Skip to content

Commit

Permalink
Merge branch 'codespell-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
polluks authored Dec 22, 2023
2 parents f431ff9 + 640e878 commit f5d4cbb
Show file tree
Hide file tree
Showing 14 changed files with 2,637 additions and 163 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/codespell-private.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
RUFF_FORMAT: github
# Make sure we're using the latest aspell dictionary
runs-on: ubuntu-22.04
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
Expand All @@ -35,7 +36,7 @@ jobs:
with:
persist-credentials: false
- name: Setup python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- run: sudo apt-get install libaspell-dev aspell-en
Expand All @@ -60,9 +61,10 @@ jobs:

make-check-dictionaries:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.x
- uses: actions/checkout@v4
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/codespell-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ jobs:
test-windows:
name: Test Windows
runs-on: windows-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: Install dependencies
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
Expand All @@ -36,7 +36,7 @@ jobs:
- name: Check env vars
run: |
echo "Triggered by: ${{ github.event_name }}"
- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: dist
path: dist
Expand All @@ -47,7 +47,7 @@ jobs:
runs-on: ubuntu-latest
if: github.event_name == 'release'
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: dist
path: dist
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ codespell.egg-info
.cache/
.mypy_cache/
.pytest_cache/
.ruff_cache/
codespell_lib/_version.py
junit-results.xml
*.egg-info/
2 changes: 2 additions & 0 deletions .mailmap
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Dimitri Papadopoulos Orfanos <[email protected]>
luzpaz <luzpaz@@users.noreply.github.com> <[email protected]>
17 changes: 5 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,25 +50,18 @@ repos:
files: dictionary.*\.txt$|\.wordlist$
args: [--ignore-case]
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.32.0
rev: v1.33.0
hooks:
- id: yamllint
args:
- --no-warnings
- -d
- '{extends: relaxed, rules: {line-length: {max: 90}}}'
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.292
rev: v0.1.8
hooks:
- id: ruff
- repo: https://github.com/PyCQA/autoflake
rev: v2.2.1
hooks:
- id: autoflake
- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
Expand All @@ -77,11 +70,11 @@ repos:
additional_dependencies:
- tomli
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.14
rev: v0.15
hooks:
- id: validate-pyproject
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.5.1
rev: v1.7.1
hooks:
- id: mypy
args: ["--config-file", "pyproject.toml"]
Expand Down
7 changes: 6 additions & 1 deletion codespell_lib/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import sys

from ._codespell import _script_main

if __name__ == "__main__":
_script_main()
try:
sys.exit(_script_main())
except KeyboardInterrupt:
pass
70 changes: 43 additions & 27 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
# Pass all misspellings through this translation table to generate
# alternative misspellings and fixes.
alt_chars = (("'", "’"),)
encodings = ("utf-8", "iso-8859-1")
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
"""
Expand All @@ -60,7 +59,7 @@
(
"rare",
"for rare (but valid) words that are likely to be errors",
"_rare", # noqa: E501
"_rare",
None,
None,
None,
Expand Down Expand Up @@ -105,7 +104,7 @@
(
"en-GB_to_en-US",
"for corrections from en-GB to en-US",
"_en-GB_to_en-US", # noqa: E501
"_en-GB_to_en-US",
True,
True,
("en_GB",),
Expand All @@ -119,6 +118,7 @@
EX_OK = 0
EX_USAGE = 64
EX_DATAERR = 65
EX_CONFIG = 78

# OPTIONS:
#
Expand Down Expand Up @@ -204,12 +204,13 @@ def __init__(self, use_chardet: bool, quiet_level: int) -> None:
def init_chardet(self) -> None:
try:
from chardet.universaldetector import UniversalDetector
except ImportError:
raise ImportError(
except ImportError as e:
msg = (
"There's no chardet installed to import from. "
"Please, install it and check your PYTHONPATH "
"environment variable"
)
raise ImportError(msg) from e

self.encdetector = UniversalDetector()

Expand Down Expand Up @@ -248,7 +249,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
encoding = None
first_try = True
for encoding in encodings:
for encoding in ("utf-8", "iso-8859-1"):
if first_try:
first_try = False
elif not self.quiet_level & QuietLevels.ENCODING:
Expand All @@ -266,7 +267,8 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
else:
break
else:
raise Exception("Unknown encoding")
msg = "Unknown encoding"
raise Exception(msg)

return lines, encoding

Expand Down Expand Up @@ -477,7 +479,7 @@ def parse_options(
"- 1: disable warnings about wrong encoding.\n"
"- 2: disable warnings about binary files.\n"
"- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n" # noqa: E501
"- 8: don't print anything for non-automatic fixes.\n" # noqa: E501
"- 8: don't print anything for non-automatic fixes.\n"
"- 16: don't print the list of fixed files.\n"
"- 32: don't print configuration files.\n"
"As usual with bitmasks, these levels can be "
Expand Down Expand Up @@ -510,7 +512,7 @@ def parse_options(
"--check-hidden",
action="store_true",
default=False,
help="check hidden files and directories (those " 'starting with ".") as well.',
help='check hidden files and directories (those starting with ".") as well.',
)
parser.add_argument(
"-A",
Expand All @@ -533,6 +535,11 @@ def parse_options(
metavar="LINES",
help="print LINES of surrounding context",
)
parser.add_argument(
"--stdin-single-line",
action="store_true",
help="output just a single line for each misspelling in stdin mode",
)
parser.add_argument("--config", type=str, help="path to config file.")
parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
parser.add_argument("files", nargs="*", help="files or directories to check")
Expand Down Expand Up @@ -562,10 +569,11 @@ def parse_options(
import tomli as tomllib # type: ignore[no-redef]
except ImportError as e:
if tomllib_raise_error:
raise ImportError(
msg = (
f"tomllib or tomli are required to read pyproject.toml "
f"but could not be imported, got: {e}"
) from None
)
raise ImportError(msg) from None
tomllib = None # type: ignore[assignment]
if tomllib is not None:
for toml_file in toml_files:
Expand All @@ -584,7 +592,7 @@ def parse_options(
used_cfg_files.append(cfg_file)

# Use config files
config.read(cfg_files)
config.read(used_cfg_files)
if config.has_section("codespell"):
# Build a "fake" argv list using option name and value.
cfg_args = []
Expand All @@ -609,24 +617,23 @@ def parse_options(


def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]:
ignore_words = set()
ignore_words: Set[str] = set()
if ignore_words_option:
for comma_separated_words in ignore_words_option:
for word in comma_separated_words.split(","):
ignore_words.add(word.strip())
ignore_words.update(
word.strip() for word in comma_separated_words.split(",")
)
return ignore_words


def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
with open(filename, encoding="utf-8") as f:
for line in f:
exclude_lines.add(line)
exclude_lines.update(line.rstrip() for line in f)


def build_ignore_words(filename: str, ignore_words: Set[str]) -> None:
with open(filename, encoding="utf-8") as f:
for line in f:
ignore_words.add(line.strip())
ignore_words.update(line.strip() for line in f)


def add_misspelling(
Expand Down Expand Up @@ -803,7 +810,7 @@ def apply_uri_ignore_words(
) -> List[Match[str]]:
if not uri_ignore_words:
return check_matches
for uri in re.findall(uri_regex, line):
for uri in uri_regex.findall(line):
for uri_word in extract_words(uri, word_regex, ignore_word_regex):
if uri_word in uri_ignore_words:
# determine/remove only the first among matches
Expand Down Expand Up @@ -831,10 +838,10 @@ def parse_file(
bad_count = 0
lines = None
changed = False
encoding = encodings[0] # if not defined, use UTF-8

if filename == "-":
f = sys.stdin
encoding = "utf-8"
lines = f.readlines()
else:
if options.check_filenames:
Expand Down Expand Up @@ -888,7 +895,7 @@ def parse_file(
return bad_count

for i, line in enumerate(lines):
if line in exclude_lines:
if line.rstrip() in exclude_lines:
continue

fixed_words = set()
Expand Down Expand Up @@ -990,6 +997,8 @@ def parse_file(
f"{cfilename}:{cline}: {cwrongword} "
f"==> {crightword}{creason}"
)
elif options.stdin_single_line:
print(f"{cline}: {cwrongword} ==> {crightword}{creason}")
else:
print(
f"{cline}: {line.strip()}\n\t{cwrongword} "
Expand Down Expand Up @@ -1019,7 +1028,14 @@ def _script_main() -> int:

def main(*args: str) -> int:
"""Contains flow control"""
options, parser, used_cfg_files = parse_options(args)
try:
options, parser, used_cfg_files = parse_options(args)
except configparser.Error as e:
print(
f"ERROR: ill-formed config file: {e.message}",
file=sys.stderr,
)
return EX_CONFIG

# Report used config files
if not options.quiet_level & QuietLevels.CONFIG_FILES:
Expand Down Expand Up @@ -1080,7 +1096,7 @@ def main(*args: str) -> int:
return EX_USAGE
uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list)

dictionaries = options.dictionary if options.dictionary else ["-"]
dictionaries = options.dictionary or ["-"]

use_dictionaries = []
for dictionary in dictionaries:
Expand Down Expand Up @@ -1161,19 +1177,19 @@ def main(*args: str) -> int:
return EX_USAGE

bad_count = 0
for filename in options.files:
for filename in sorted(options.files):
# ignore hidden files
if is_hidden(filename, options.check_hidden):
continue

if os.path.isdir(filename):
for root, dirs, files in os.walk(filename):
if glob_match.match(root): # skip (absolute) directories
del dirs[:]
dirs.clear()
continue
if is_hidden(root, options.check_hidden): # dir itself hidden
continue
for file_ in files:
for file_ in sorted(files):
# ignore hidden files in directories
if is_hidden(file_, options.check_hidden):
continue
Expand Down
Loading

0 comments on commit f5d4cbb

Please sign in to comment.