Merge pull request #159 from roskakori/157-change-linting-to-ruff

#157 Change linting to ruff
roskakori · Jul 6, 2024 · 5997b6d · 5997b6d
2 parents e5f6bf6 + 1cbd894
commit 5997b6d
Show file tree

Hide file tree

Showing 18 changed files with 471 additions and 346 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,15 +1,12 @@
 exclude: "^.idea"
 
 repos:
-  - repo: https://github.com/pre-commit/mirrors-isort
-    rev: v5.10.1
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.5.1
     hooks:
-      - id: isort
-
-  - repo: https://github.com/ambv/black
-    rev: 24.4.2
-    hooks:
-      - id: black
+      - id: ruff
+        args: ["--fix"]
+      - id: ruff-format
 
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v3.1.0
@@ -31,14 +28,8 @@ repos:
       - id: check-yaml
       - id: debug-statements
 
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.2
-    hooks:
-      - id: pyupgrade
-        args: ["--py38-plus"]
-
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.0.0
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
     hooks:
-      - id: flake8
-        additional_dependencies: ["pep8-naming==0.13.3"]
+      - id: no-commit-to-branch
+        args: ["--branch", "master"]
diff --git a/docs/changes.rst b/docs/changes.rst
@@ -5,6 +5,11 @@ Changes
 
 This chapter describes the changes coming with each new version of pygount.
 
+Version 1.8.1, 2024-07-06
+
+* Development: Change linter to ruff and in turn clean up code (issue
+  `#157 <https://github.com/roskakori/pygount/issues/157>`_).
+
 Version 1.8.0, 2024-05-13
 
 * Add all available counts and percentages to JSON format (issue

diff --git a/docs/conf.py b/docs/conf.py
@@ -18,7 +18,7 @@
 # -- Project information -----------------------------------------------------
 
 project = "pygount"
-copyright = "2016-2024, Thomas Aglassinger"
+copyright = "2016-2024, Thomas Aglassinger"  # noqa: A001
 author = "Thomas Aglassinger"
 
 # The full version, including alpha/beta/rc tags

diff --git a/poetry.lock b/poetry.lock
diff --git a/pygount/analysis.py b/pygount/analysis.py
@@ -24,7 +24,7 @@
 import pygount.common
 import pygount.lexers
 import pygount.xmldialect
-from pygount.common import deprecated
+from pygount.common import deprecated, mapped_repr
 from pygount.git_storage import GitStorage, git_remote_url_and_revision_if_any
 
 # Attempt to import chardet.
@@ -481,17 +481,24 @@ def is_countable(self) -> bool:
         return self.state in (SourceState.analyzed, SourceState.duplicate)
 
     def __repr__(self):
-        result = "{}(path={!r}, language={!r}, group={!r}, state={}".format(
-            self.__class__.__name__, self.path, self.language, self.group, self.state.name
-        )
+        name_to_value_map = {
+            "path": repr(self.path),
+            "language": repr(self.language),
+            "group": repr(self.group),
+            "state": self.state.name,
+        }
         if self.state == SourceState.analyzed:
-            result += ", code_count={}, documentation_count={}, empty_count={}, string_count={}".format(
-                self.code_count, self.documentation_count, self.empty_count, self.string_count
+            name_to_value_map.update(
+                {
+                    "code_count": self.code_count,
+                    "documentation_count": self.documentation_count,
+                    "empty_count": self.empty_count,
+                    "string_count": self.string_count,
+                }
             )
         if self.state_info is not None:
-            result += f", state_info={self.state_info!r}"
-        result += ")"
-        return result
+            name_to_value_map["state_info"] = repr(self.state_info)
+        return mapped_repr(self, name_to_value_map)
 
 
 class SourceScanner:
@@ -558,7 +565,7 @@ def name_regexps_to_skip(self, regexps_or_pattern_text):
         self._name_regexps_to_skip = pygount.common.regexes_from(regexps_or_pattern_text, self.name_regexps_to_skip)
 
     def _is_path_to_skip(self, name, is_folder) -> bool:
-        assert os.sep not in name, "name=%r" % name
+        assert os.sep not in name, f"name={name!r}"
         regexps_to_skip = self._folder_regexps_to_skip if is_folder else self._name_regexps_to_skip
         return any(path_name_to_skip_regex.match(name) is not None for path_name_to_skip_regex in regexps_to_skip)
 
@@ -604,8 +611,11 @@ def _paths_and_group_to_analyze(self, path_to_analyse_pattern, group=None) -> It
     def _source_paths_and_groups_to_analyze(self, source_patterns_to_analyze) -> List[Tuple[str, str]]:
         assert source_patterns_to_analyze is not None
         result = []
-        for source_pattern_to_analyze in source_patterns_to_analyze:
-            try:
+        # NOTE: We could avoid initializing `source_pattern_to_analyze` here by moving the `try` inside
+        #  the loop, but this would incor a performance overhead (ruff's PERF203).
+        source_pattern_to_analyze = None
+        try:
+            for source_pattern_to_analyze in source_patterns_to_analyze:
                 remote_url, revision = git_remote_url_and_revision_if_any(source_pattern_to_analyze)
                 if remote_url is not None:
                     git_storage = GitStorage(remote_url, revision)
@@ -615,8 +625,9 @@ def _source_paths_and_groups_to_analyze(self, source_patterns_to_analyze) -> Lis
                     result.extend(self._paths_and_group_to_analyze(git_storage.temp_folder))
                 else:
                     result.extend(self._paths_and_group_to_analyze(source_pattern_to_analyze))
-            except OSError as error:
-                raise OSError(f'cannot scan "{source_pattern_to_analyze}" for source files: {error}')
+        except OSError as error:
+            assert source_pattern_to_analyze is not None
+            raise OSError(f'cannot scan "{source_pattern_to_analyze}" for source files: {error}') from error
         result = sorted(set(result))
         return result
 
@@ -636,7 +647,7 @@ def source_paths(self) -> Iterator[str]:
 
 
 _LANGUAGE_TO_WHITE_WORDS_MAP = {"batchfile": {"@"}, "python": {"pass"}, "sql": {"begin", "end"}}
-for _language in _LANGUAGE_TO_WHITE_WORDS_MAP.keys():
+for _language in _LANGUAGE_TO_WHITE_WORDS_MAP:
     assert _language.islower()
 
 
@@ -686,32 +697,35 @@ def white_code_words(language_id: str) -> Set[str]:
     return _LANGUAGE_TO_WHITE_WORDS_MAP.get(language_id, set())
 
 
-def _delined_tokens(tokens: Sequence[Tuple[TokenType, str]]) -> Iterator[TokenType]:
+def _delined_tokens(tokens: Iterator[Tuple[TokenType, str]]) -> Iterator[TokenType]:
     for token_type, token_text in tokens:
-        newline_index = token_text.find("\n")
+        remaining_token_text = token_text
+        newline_index = remaining_token_text.find("\n")
         while newline_index != -1:
-            yield token_type, token_text[: newline_index + 1]
-            token_text = token_text[newline_index + 1 :]
-            newline_index = token_text.find("\n")
-        if token_text != "":
-            yield token_type, token_text
+            yield token_type, remaining_token_text[: newline_index + 1]
+            remaining_token_text = remaining_token_text[newline_index + 1 :]
+            newline_index = remaining_token_text.find("\n")
+        if remaining_token_text != "":
+            yield token_type, remaining_token_text
 
 
-def _pythonized_comments(tokens: Sequence[Tuple[TokenType, str]]) -> Iterator[TokenType]:
+def _pythonized_comments(tokens: Iterator[Tuple[TokenType, str]]) -> Iterator[TokenType]:
     """
     Similar to tokens but converts strings after a colon (`:`) to comments.
     """
     is_after_colon = True
-    for token_type, token_text in tokens:
+    for token_type, result_token_text in tokens:
         if is_after_colon and (token_type in pygments.token.String):
-            token_type = pygments.token.Comment
-        elif token_text == ":":
-            is_after_colon = True
-        elif token_type not in pygments.token.Comment:
-            is_whitespace = len(token_text.rstrip(" \f\n\r\t")) == 0
-            if not is_whitespace:
-                is_after_colon = False
-        yield token_type, token_text
+            result_token_type = pygments.token.Comment
+        else:
+            result_token_type = token_type
+            if result_token_text == ":":
+                is_after_colon = True
+            elif token_type not in pygments.token.Comment:
+                is_whitespace = len(result_token_text.rstrip(" \f\n\r\t")) == 0
+                if not is_whitespace:
+                    is_after_colon = False
+        yield result_token_type, result_token_text
 
 
 def _line_parts(lexer: pygments.lexer.Lexer, text: str) -> Iterator[Set[str]]:
@@ -785,11 +799,14 @@ def encoding_for(
             # File is empty, assume a dummy encoding.
             result = "utf-8"
         if result is None:
-            # Check for known BOMs.
-            for bom, encoding in _BOM_TO_ENCODING_MAP.items():
-                if heading[: len(bom)] == bom:
-                    result = encoding
-                    break
+            result = next(
+                (
+                    encoding_for_bom
+                    for bom, encoding_for_bom in _BOM_TO_ENCODING_MAP.items()
+                    if heading[: len(bom)] == bom
+                ),
+                None,
+            )
         if result is None:
             # Look for common headings that indicate the encoding.
             ascii_heading = heading.decode("ascii", errors="replace")

diff --git a/pygount/command.py b/pygount/command.py
@@ -39,7 +39,7 @@
  existing default values."""
 
 _HELP_FORMAT = 'output format, one of: {}; default: "%(default)s"'.format(
-    ", ".join(['"' + format + '"' for format in VALID_OUTPUT_FORMATS])
+    ", ".join(['"' + output_format + '"' for output_format in VALID_OUTPUT_FORMATS])
 )
 
 _HELP_GENERATED = """comma separated list of regular expressions to detect
@@ -86,11 +86,9 @@ def _check_encoding(name, encoding_to_check, alternative_encoding, source=None):
             "".encode(encoding_to_check)
         except LookupError:
             raise pygount.common.OptionError(
-                '{} is "{}" but must be "{}" or a known Python encoding'.format(
-                    name, encoding_to_check, alternative_encoding
-                ),
+                f'{name} is "{encoding_to_check}" but must be "{alternative_encoding}" or a known Python encoding',
                 source,
-            )
+            ) from None
 
 
 class Command:
@@ -120,14 +118,13 @@ def set_encodings(self, encoding, source=None):
         if encoding in ("automatic", "chardet"):
             default_encoding = encoding
             fallback_encoding = None
+        elif encoding.startswith(("automatic;", "chardet;")):
+            first_encoding_semicolon_index = encoding.find(";")
+            default_encoding = encoding[:first_encoding_semicolon_index]
+            fallback_encoding = encoding[first_encoding_semicolon_index + 1 :]
         else:
-            if encoding.startswith("automatic;") or encoding.startswith("chardet;"):
-                first_encoding_semicolon_index = encoding.find(";")
-                default_encoding = encoding[:first_encoding_semicolon_index]
-                fallback_encoding = encoding[first_encoding_semicolon_index + 1 :]
-            else:
-                default_encoding = encoding
-                fallback_encoding = pygount.analysis.DEFAULT_FALLBACK_ENCODING
+            default_encoding = encoding
+            fallback_encoding = pygount.analysis.DEFAULT_FALLBACK_ENCODING
         self.set_default_encoding(default_encoding, source)
         self.set_fallback_encoding(fallback_encoding, source)
 
@@ -350,25 +347,26 @@ def execute(self):
             target_context_manager = (
                 contextlib.nullcontext(sys.stdout)
                 if is_stdout
-                else open(self.output, "w", encoding="utf-8", newline="")
+                else open(self.output, "w", encoding="utf-8", newline="")  # noqa: SIM115
             )
-            with target_context_manager as target_file, writer_class(target_file) as writer:
-                with Progress(disable=not writer.has_to_track_progress, transient=True) as progress:
-                    try:
-                        for source_path, group in progress.track(source_paths_and_groups_to_analyze):
-                            writer.add(
-                                pygount.analysis.SourceAnalysis.from_file(
-                                    source_path,
-                                    group,
-                                    self.default_encoding,
-                                    self.fallback_encoding,
-                                    generated_regexes=self._generated_regexs,
-                                    duplicate_pool=duplicate_pool,
-                                    merge_embedded_language=self.has_to_merge_embedded_languages,
-                                )
+            with target_context_manager as target_file, writer_class(target_file) as writer, Progress(
+                disable=not writer.has_to_track_progress, transient=True
+            ) as progress:
+                try:
+                    for source_path, group in progress.track(source_paths_and_groups_to_analyze):
+                        writer.add(
+                            pygount.analysis.SourceAnalysis.from_file(
+                                source_path,
+                                group,
+                                self.default_encoding,
+                                self.fallback_encoding,
+                                generated_regexes=self._generated_regexs,
+                                duplicate_pool=duplicate_pool,
+                                merge_embedded_language=self.has_to_merge_embedded_languages,
                             )
-                    finally:
-                        progress.stop()
+                        )
+                finally:
+                    progress.stop()
 
 
 def pygount_command(arguments=None):

diff --git a/pygount/common.py b/pygount/common.py
@@ -25,8 +25,6 @@ class Error(Exception):
     Error to indicate that something went wrong during a pygount run.
     """
 
-    pass
-
 
 class OptionError(Error):
     """
@@ -55,10 +53,7 @@ def as_list(items_or_text: Union[str, Sequence[str]]) -> List[str]:
 def regex_from(pattern: Union[str, Pattern], is_shell_pattern=False) -> Pattern:
     assert pattern is not None
     if isinstance(pattern, str):
-        if is_shell_pattern:
-            result = re.compile(fnmatch.translate(pattern))
-        else:
-            result = re.compile(pattern)
+        result = re.compile(fnmatch.translate(pattern)) if is_shell_pattern else re.compile(pattern)
     else:
         result = pattern  # Assume pattern already is a compiled regular expression
     return result
@@ -86,20 +81,19 @@ def regexes_from(
                 patterns_text_without_prefixes = patterns_text_without_prefixes[len(ADDITIONAL_PATTERN) :]
 
             patterns = as_list(patterns_text_without_prefixes)
-            for pattern in patterns:
-                result.append(regex_from(pattern, is_shell_pattern))
+            result = [regex_from(pattern, is_shell_pattern) for pattern in patterns]
         else:
             regexes = list(patterns_text)
             if len(regexes) >= 1 and regexes[0] is None:
                 default_regexes = regexes_from(default_patterns_text)
                 regexes = regexes[1:]
             for supposed_regex in regexes:
-                assert isinstance(supposed_regex, _REGEX_TYPE), (
-                    "patterns_text must a text or sequence or regular expressions but contains: %a" % supposed_regex
-                )
+                assert isinstance(
+                    supposed_regex, _REGEX_TYPE
+                ), f"patterns_text must a text or sequence or regular expressions but contains: {supposed_regex}"
             result.extend(regexes)
     except re.error as error:
-        raise OptionError(f"cannot parse pattern for regular repression: {error}", source)
+        raise OptionError(f"cannot parse pattern for regular repression: {error}", source) from None
     result.extend(default_regexes)
     return result
 
@@ -157,7 +151,7 @@ def new_func1(*args, **kwargs):
             return new_func1
 
         return decorator
-    elif inspect.isclass(reason) or inspect.isfunction(reason):
+    if inspect.isclass(reason) or inspect.isfunction(reason):
         # The @deprecated is used without any 'reason'.
         #
         # .. code-block:: python
@@ -167,11 +161,7 @@ def new_func1(*args, **kwargs):
         #      pass
 
         func2 = reason
-
-        if inspect.isclass(func2):
-            fmt2 = "Call to deprecated class {name}."
-        else:
-            fmt2 = "Call to deprecated function {name}."
+        fmt2 = "Call to deprecated class {name}." if inspect.isclass(func2) else "Call to deprecated function {name}."
 
         @functools.wraps(func2)
         def new_func2(*args, **kwargs):
@@ -181,5 +171,10 @@ def new_func2(*args, **kwargs):
             return func2(*args, **kwargs)
 
         return new_func2
-    else:
-        raise TypeError(repr(type(reason)))
+    raise TypeError(repr(type(reason)))
+
+
+def mapped_repr(type_, name_to_value_map) -> str:
+    result = ", ".join(f"{name}={value}" for name, value in name_to_value_map.items())
+    result = f"{type_.__class__.__name__}({result})"
+    return result