Merge pull request #19 from n-takumasa/cleanup

Cleanup
n-takumasa · Mar 6, 2024 · 82bbc07 · 82bbc07
2 parents a4b5b11 + c9ead2f
commit 82bbc07
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 178 deletions.
diff --git a/jsonc/__init__.py b/jsonc/__init__.py
@@ -13,119 +13,30 @@
 
 from __future__ import annotations
 
-from copy import deepcopy
-from io import StringIO
-from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
-from typing import TYPE_CHECKING, Any, TextIO
-from warnings import warn
-
-__version__ = "0.0.0"
 import json
-import re
-from json import JSONDecoder, JSONEncoder  # for compatibility
+import warnings
+from json import JSONDecodeError, JSONDecoder, JSONEncoder
+from typing import TYPE_CHECKING
+
+from jsonc._add_comments import _add_comments
+from jsonc._util import _add_trailing_comma, _remove_c_comment, _remove_trailing_comma
 
 if TYPE_CHECKING:
     from collections.abc import Callable
+    from typing import Any, TextIO
 
-    CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
-    Comments = str | CommentsDict | tuple[str, CommentsDict]
-
-
-_REMOVE_C_COMMENT = r"""
-    ( # String Literal
-        \"(?:\\.|[^\\\"])*?\"
-    )
-    |
-    ( # Comment
-        \/\*.*?\*\/
-        |
-        \/\/[^\r\n]*?(?:[\r\n])
-    )
-    """
-
-
-_REMOVE_TRAILING_COMMA = r"""
-    ( # String Literal
-        \"(?:\\.|[^\\\"])*?\"
-    )
-    | # Right Brace without Trailing Comma & Spaces
-    ,\s*([\]}])
-"""
-
-
-_ADD_TRAILING_COMMA = r"""
-    ( # String Literal
-        \"(?:\\.|[^\\\"])*?\"
-    )
-    | # Don't match opening braces to avoid {,}
-    ((?<=\")|[^,\[{\s])
-    (?=\s*([\]}]))
-"""
-
-
-def _remove_c_comment(text: str) -> str:
-    if text[-1] != "\n":
-        text = text + "\n"
-    return re.sub(
-        _REMOVE_C_COMMENT,
-        lambda x: x.group(1),
-        text,
-        flags=re.DOTALL | re.VERBOSE,
-    )
-
-
-def _remove_trailing_comma(text: str) -> str:
-    return re.sub(
-        _REMOVE_TRAILING_COMMA,
-        lambda x: x.group(1) or x.group(2),
-        text,
-        flags=re.DOTALL | re.VERBOSE,
-    )
-
-
-def _add_trailing_comma(text: str) -> str:
-    return re.sub(
-        _ADD_TRAILING_COMMA,
-        lambda x: x.group(1) or x.group(2) + ",",
-        text,
-        flags=re.DOTALL | re.VERBOSE,
-    )
-
-
-def _make_comment(text: str, indent=0) -> str:
-    return "\n".join(
-        " " * indent + "// " + line if line else "" for line in text.splitlines()
-    )
-
+    from jsonc._add_comments import Comments
 
-def _get_comments(
-    comments: CommentsDict | None,
-    key: str | int,
-) -> tuple[str | None, CommentsDict | None]:
-    if comments is not None:
-        comments = comments.pop(key, None)
-        if isinstance(comments, tuple):
-            comm, comments = comments
-        elif isinstance(comments, str):
-            comm = comments
-            comments = None
-        else:
-            comm = None
-        return comm, comments
-    return None, None
-
-
-def _warn_unused(
-    comments: CommentsDict | None,
-    stack: list[tuple[CommentsDict | None, int | None, str | int]],
-):
-    if not comments:
-        return
-    full_key = ".".join(str(key) for _, _, key in stack[1:])
-    if full_key:
-        full_key += "."
-    for k in comments:
-        warn("Unused comment with key: " + full_key + str(k))  # TODO # noqa: B028
+__version__ = "0.0.0"
+__all__ = [
+    "dump",
+    "dumps",
+    "load",
+    "loads",
+    "JSONDecoder",
+    "JSONDecodeError",
+    "JSONEncoder",
+]
 
 
 def load(
@@ -184,75 +95,6 @@ def loads(
     )
 
 
-def add_comments(data: str, comments: Comments) -> str:
-    header, comments = _get_comments({0: deepcopy(comments)}, 0)
-    header = _make_comment(header) + "\n" if header else ""
-    result = []
-    stack = []
-    line_shift = 0
-    array_index: int | None = None
-    key: str | int | None = None
-    for token in generate_tokens(StringIO(data).readline):
-        if (
-            token.type == STRING or (array_index is not None and token.string != "]")
-        ) and result[-1].type == NL:
-            key = array_index if array_index is not None else json.loads(token.string)
-            stack.append((comments, array_index, key))
-            comm, comments = _get_comments(comments, key)
-            if comm:
-                comm = _make_comment(comm, token.start[1])
-                comm_coord = (token.start[0] + line_shift, 0)
-                result.append(
-                    TokenInfo(
-                        COMMENT,
-                        comm,
-                        comm_coord,
-                        comm_coord,
-                        "",
-                    ),
-                )
-                result.append(
-                    TokenInfo(
-                        NL,
-                        "\n",
-                        comm_coord,
-                        comm_coord,
-                        "",
-                    ),
-                )
-                line_shift += 1
-
-        if token.string == ",":
-            _warn_unused(comments, stack)
-            comments, array_index, key = stack.pop()
-            if array_index is not None:
-                array_index += 1
-        elif token.string == "[":
-            stack.append((comments, array_index, key))
-            array_index = 0
-        elif token.string == "{":
-            stack.append((comments, array_index, key))
-            array_index = None
-        elif token.string in {"]", "}"}:
-            _warn_unused(comments, stack)
-            comments, array_index, key = stack.pop()
-            if result[-1].type == NL and result[-2].string != ",":
-                _warn_unused(comments, stack)
-                comments, array_index, key = stack.pop()
-
-        token = TokenInfo(  # TODO # noqa: PLW2901
-            token.type,
-            token.string,
-            (token.start[0] + line_shift, token.start[1]),
-            (token.end[0] + line_shift, token.end[1]),
-            token.line,
-        )
-        result.append(token)
-
-    assert not stack, "Error when adding comments to JSON"  # TODO # noqa: S101
-    return header + untokenize(result)
-
-
 def dumps(
     obj: Any,
     *,
@@ -294,10 +136,10 @@ def dumps(
     if comments is None:
         return data
     if indent is None:
-        warn("Can't add comments to non-indented JSON")  # TODO # noqa: B028
+        warnings.warn("Can't add comments to non-indented JSON", stacklevel=2)
         return data
 
-    return add_comments(data, comments)
+    return _add_comments(data, comments)
 
 
 def dump(

diff --git a/jsonc/_add_comments.py b/jsonc/_add_comments.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+import copy
+import io
+import json
+import sys
+import warnings
+from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
+    Comments = str | CommentsDict | tuple[str, CommentsDict]
+
+
+def _make_comment(text: str, indent=0) -> str:
+    return "\n".join(
+        " " * indent + "// " + line if line else "" for line in text.splitlines()
+    )
+
+
+def _get_comments(
+    comments: CommentsDict | None,
+    key: str | int,
+) -> tuple[str | None, CommentsDict | None]:
+    if comments is not None:
+        cbody: Comments | None = comments.pop(key, None)  # type: ignore[reportGeneralTypeIssues]
+        if isinstance(cbody, tuple):
+            chead, cbody = cbody
+        elif isinstance(cbody, str):
+            chead = cbody
+            cbody = None
+        else:
+            chead = None
+        return chead, cbody
+    return None, None
+
+
+def _warn_unused(
+    comments: CommentsDict | None,
+    stack: list[tuple[CommentsDict | None, int | None, str | int]],
+):
+    if not comments:
+        return
+    full_key = ".".join(str(key) for _, _, key in stack[1:])
+    if full_key:
+        full_key += "."
+    for k in comments:
+        f = sys._getframe()  # noqa: SLF001
+        filename = f.f_code.co_filename
+        stacklevel = 2
+        while f := f.f_back:
+            if f.f_code.co_filename != filename:
+                break
+            stacklevel += 1
+        warnings.warn(
+            "Unused comment with key: " + full_key + str(k),
+            stacklevel=4,
+        )
+
+
+def _add_comments(data: str, comments: Comments) -> str:  # noqa: C901
+    header, cdict = _get_comments({0: copy.deepcopy(comments)}, 0)
+    header = _make_comment(header) + "\n" if header else ""
+    result = []
+    stack = []
+    line_shift = 0
+    array_index: int | None = None
+    key: str | int | None = None
+    for token in generate_tokens(io.StringIO(data).readline):
+        if (
+            token.type == STRING or (array_index is not None and token.string != "]")
+        ) and result[-1].type == NL:
+            key = array_index if array_index is not None else json.loads(token.string)
+            stack.append((cdict, array_index, key))
+            comm, cdict = _get_comments(cdict, key)  # type: ignore[reportGeneralTypeIssues]
+            if comm:
+                comm = _make_comment(comm, token.start[1])
+                comm_coord = (token.start[0] + line_shift, 0)
+                result.append(
+                    TokenInfo(
+                        COMMENT,
+                        comm,
+                        comm_coord,
+                        comm_coord,
+                        "",
+                    ),
+                )
+                result.append(
+                    TokenInfo(
+                        NL,
+                        "\n",
+                        comm_coord,
+                        comm_coord,
+                        "",
+                    ),
+                )
+                line_shift += 1
+
+        if token.string == ",":
+            _warn_unused(cdict, stack)
+            cdict, array_index, key = stack.pop()
+            if array_index is not None:
+                array_index += 1
+        elif token.string == "[":
+            stack.append((cdict, array_index, key))
+            array_index = 0
+        elif token.string == "{":
+            stack.append((cdict, array_index, key))
+            array_index = None
+        elif token.string in {"]", "}"}:
+            _warn_unused(cdict, stack)
+            cdict, array_index, key = stack.pop()
+            if result[-1].type == NL and result[-2].string != ",":
+                _warn_unused(cdict, stack)
+                cdict, array_index, key = stack.pop()
+
+        result.append(
+            TokenInfo(
+                token.type,
+                token.string,
+                (token.start[0] + line_shift, token.start[1]),
+                (token.end[0] + line_shift, token.end[1]),
+                token.line,
+            ),
+        )
+
+    if stack:
+        msg = "Error when adding comments to JSON"
+        raise ValueError(msg)
+    return header + untokenize(result)