Implement linting for html5lib-tests

This checks that we have the right headers, in the right order, and checks for both duplicate headers and duplicate tests.
html5lib · Apr 6, 2023 · 0d5c740 · 0d5c740
1 parent 4e82e3d
commit 0d5c740
Show file tree

Hide file tree

Showing 19 changed files with 1,891 additions and 0 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,25 @@
+name: lint
+
+concurrency:
+  group: "${{github.workflow}}-${{github.ref}}"
+  cancel-in-progress: true
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [opened, synchronize]
+    branches:
+      - '*'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - run: ./lint
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,79 @@
+# Copyright (c) 2014 GitHub, Inc.
+#
+# Permission is hereby granted,  free of charge,  to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to  use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+doc/_build/
+
+# PyBuilder
+target/
diff --git a/lint b/lint
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+import sys
+
+import lint_lib.lint as lint
+
+sys.exit(lint.main())
diff --git a/lint_lib/__init__.py b/lint_lib/__init__.py
diff --git a/lint_lib/_vendor-patches/funcparserlib.patch b/lint_lib/_vendor-patches/funcparserlib.patch
@@ -0,0 +1,24 @@
+diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
+index eb2f53f..0f86e6c 100644
+--- a/lint_lib/_vendor/funcparserlib/parser.py
++++ b/lint_lib/_vendor/funcparserlib/parser.py
+@@ -137,19 +137,6 @@ class Parser(object):
+         "('x', 'y')"
+
+         ```
+-
+-        !!! Note
+-
+-            You can enable the parsing log this way:
+-
+-            ```python
+-            import logging
+-            logging.basicConfig(level=logging.DEBUG)
+-            import funcparserlib.parser
+-            funcparserlib.parser.debug = True
+-            ```
+-
+-            The way to enable the parsing log may be changed in future versions.
+         """
+         self.name = name
+         return self
diff --git a/lint_lib/_vendor/__init__.py b/lint_lib/_vendor/__init__.py
diff --git a/lint_lib/_vendor/funcparserlib/LICENSE b/lint_lib/_vendor/funcparserlib/LICENSE
@@ -0,0 +1,18 @@
+Copyright © 2009/2021 Andrey Vlasovskikh
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify,
+merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lint_lib/_vendor/funcparserlib/__init__.py b/lint_lib/_vendor/funcparserlib/__init__.py
diff --git a/lint_lib/_vendor/funcparserlib/lexer.py b/lint_lib/_vendor/funcparserlib/lexer.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2009/2021 Andrey Vlasovskikh
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies
+# or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import unicode_literals
+
+__all__ = ["make_tokenizer", "TokenSpec", "Token", "LexerError"]
+
+import re
+
+
+class LexerError(Exception):
+    def __init__(self, place, msg):
+        self.place = place
+        self.msg = msg
+
+    def __str__(self):
+        s = "cannot tokenize data"
+        line, pos = self.place
+        return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
+
+
+class TokenSpec(object):
+    """A token specification for generating a lexer via `make_tokenizer()`."""
+
+    def __init__(self, type, pattern, flags=0):
+        """Initialize a `TokenSpec` object.
+
+        Parameters:
+            type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
+                `"operator"`)
+            pattern (str): Regexp for matching this token type
+            flags (int, optional): Regexp flags, the second argument of `re.compile()`
+        """
+        self.type = type
+        self.pattern = pattern
+        self.flags = flags
+
+    def __repr__(self):
+        return "TokenSpec(%r, %r, %r)" % (self.type, self.pattern, self.flags)
+
+
+class Token(object):
+    """A token object that represents a substring of certain type in your text.
+
+    You can compare tokens for equality using the `==` operator. Tokens also define
+    custom `repr()` and `str()`.
+
+    Attributes:
+        type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
+            `"operator"`)
+        value (str): Text value of the token
+        start (Optional[Tuple[int, int]]): Start position (_line_, _column_)
+        end (Optional[Tuple[int, int]]): End position (_line_, _column_)
+    """
+
+    def __init__(self, type, value, start=None, end=None):
+        """Initialize a `Token` object."""
+        self.type = type
+        self.value = value
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        return "Token(%r, %r)" % (self.type, self.value)
+
+    def __eq__(self, other):
+        # FIXME: Case sensitivity is assumed here
+        if other is None:
+            return False
+        else:
+            return self.type == other.type and self.value == other.value
+
+    def _pos_str(self):
+        if self.start is None or self.end is None:
+            return ""
+        else:
+            sl, sp = self.start
+            el, ep = self.end
+            return "%d,%d-%d,%d:" % (sl, sp, el, ep)
+
+    def __str__(self):
+        s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
+        return s.strip()
+
+    @property
+    def name(self):
+        return self.value
+
+    def pformat(self):
+        return "%s %s '%s'" % (
+            self._pos_str().ljust(20),  # noqa
+            self.type.ljust(14),
+            self.value,
+        )
+
+
+def make_tokenizer(specs):
+    # noinspection GrazieInspection
+    """Make a function that tokenizes text based on the regexp specs.
+
+    Type: `(Sequence[TokenSpec | Tuple]) -> Callable[[str], Iterable[Token]]`
+
+    A token spec is `TokenSpec` instance.
+
+    !!! Note
+
+        For legacy reasons, a token spec may also be a tuple of (_type_, _args_), where
+        _type_ sets the value of `Token.type` for the token, and _args_ are the
+        positional arguments for `re.compile()`: either just (_pattern_,) or
+        (_pattern_, _flags_).
+
+    It returns a tokenizer function that takes a string and returns an iterable of
+    `Token` objects, or raises `LexerError` if it cannot tokenize the string according
+    to its token specs.
+
+    Examples:
+
+    ```pycon
+    >>> tokenize = make_tokenizer([
+    ...     TokenSpec("space", r"\\s+"),
+    ...     TokenSpec("id", r"\\w+"),
+    ...     TokenSpec("op", r"[,!]"),
+    ... ])
+    >>> text = "Hello, World!"
+    >>> [t for t in tokenize(text) if t.type != "space"]  # noqa
+    [Token('id', 'Hello'), Token('op', ','), Token('id', 'World'), Token('op', '!')]
+    >>> text = "Bye?"
+    >>> list(tokenize(text))
+    Traceback (most recent call last):
+        ...
+    lexer.LexerError: cannot tokenize data: 1,4: "Bye?"
+
+    ```
+    """
+    compiled = []
+    for spec in specs:
+        if isinstance(spec, TokenSpec):
+            c = spec.type, re.compile(spec.pattern, spec.flags)
+        else:
+            name, args = spec
+            c = name, re.compile(*args)
+        compiled.append(c)
+
+    def match_specs(s, i, position):
+        line, pos = position
+        for type, regexp in compiled:
+            m = regexp.match(s, i)
+            if m is not None:
+                value = m.group()
+                nls = value.count("\n")
+                n_line = line + nls
+                if nls == 0:
+                    n_pos = pos + len(value)
+                else:
+                    n_pos = len(value) - value.rfind("\n") - 1
+                return Token(type, value, (line, pos + 1), (n_line, n_pos))
+        else:
+            err_line = s.splitlines()[line - 1]
+            raise LexerError((line, pos + 1), err_line)
+
+    def f(s):
+        length = len(s)
+        line, pos = 1, 0
+        i = 0
+        while i < length:
+            t = match_specs(s, i, (line, pos))
+            yield t
+            line, pos = t.end
+            i += len(t.value)
+
+    return f
+
+
+# This is an example of token specs. See also [this article][1] for a
+# discussion of searching for multiline comments using regexps (including `*?`).
+#
+#   [1]: http://ostermiller.org/findcomment.html
+_example_token_specs = [
+    TokenSpec("COMMENT", r"\(\*(.|[\r\n])*?\*\)", re.MULTILINE),
+    TokenSpec("COMMENT", r"\{(.|[\r\n])*?\}", re.MULTILINE),
+    TokenSpec("COMMENT", r"//.*"),
+    TokenSpec("NL", r"[\r\n]+"),
+    TokenSpec("SPACE", r"[ \t\r\n]+"),
+    TokenSpec("NAME", r"[A-Za-z_][A-Za-z_0-9]*"),
+    TokenSpec("REAL", r"[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*"),
+    TokenSpec("INT", r"[0-9]+"),
+    TokenSpec("INT", r"\$[0-9A-Fa-f]+"),
+    TokenSpec("OP", r"(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]"),
+    TokenSpec("STRING", r"'([^']|(''))*'"),
+    TokenSpec("CHAR", r"#[0-9]+"),
+    TokenSpec("CHAR", r"#\$[0-9A-Fa-f]+"),
+]
+# tokenize = make_tokenizer(_example_token_specs)