Skip to content

Commit

Permalink
Merge pull request #19 from n-takumasa/cleanup
Browse files Browse the repository at this point in the history
Cleanup
  • Loading branch information
n-takumasa authored Mar 6, 2024
2 parents a4b5b11 + c9ead2f commit 82bbc07
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 178 deletions.
198 changes: 20 additions & 178 deletions jsonc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,119 +13,30 @@

from __future__ import annotations

from copy import deepcopy
from io import StringIO
from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
from typing import TYPE_CHECKING, Any, TextIO
from warnings import warn

__version__ = "0.0.0"
import json
import re
from json import JSONDecoder, JSONEncoder # for compatibility
import warnings
from json import JSONDecodeError, JSONDecoder, JSONEncoder
from typing import TYPE_CHECKING

from jsonc._add_comments import _add_comments
from jsonc._util import _add_trailing_comma, _remove_c_comment, _remove_trailing_comma

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any, TextIO

CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
Comments = str | CommentsDict | tuple[str, CommentsDict]


_REMOVE_C_COMMENT = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
|
( # Comment
\/\*.*?\*\/
|
\/\/[^\r\n]*?(?:[\r\n])
)
"""


_REMOVE_TRAILING_COMMA = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
| # Right Brace without Trailing Comma & Spaces
,\s*([\]}])
"""


_ADD_TRAILING_COMMA = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
| # Don't match opening braces to avoid {,}
((?<=\")|[^,\[{\s])
(?=\s*([\]}]))
"""


def _remove_c_comment(text: str) -> str:
if text[-1] != "\n":
text = text + "\n"
return re.sub(
_REMOVE_C_COMMENT,
lambda x: x.group(1),
text,
flags=re.DOTALL | re.VERBOSE,
)


def _remove_trailing_comma(text: str) -> str:
return re.sub(
_REMOVE_TRAILING_COMMA,
lambda x: x.group(1) or x.group(2),
text,
flags=re.DOTALL | re.VERBOSE,
)


def _add_trailing_comma(text: str) -> str:
return re.sub(
_ADD_TRAILING_COMMA,
lambda x: x.group(1) or x.group(2) + ",",
text,
flags=re.DOTALL | re.VERBOSE,
)


def _make_comment(text: str, indent=0) -> str:
return "\n".join(
" " * indent + "// " + line if line else "" for line in text.splitlines()
)

from jsonc._add_comments import Comments

def _get_comments(
comments: CommentsDict | None,
key: str | int,
) -> tuple[str | None, CommentsDict | None]:
if comments is not None:
comments = comments.pop(key, None)
if isinstance(comments, tuple):
comm, comments = comments
elif isinstance(comments, str):
comm = comments
comments = None
else:
comm = None
return comm, comments
return None, None


def _warn_unused(
comments: CommentsDict | None,
stack: list[tuple[CommentsDict | None, int | None, str | int]],
):
if not comments:
return
full_key = ".".join(str(key) for _, _, key in stack[1:])
if full_key:
full_key += "."
for k in comments:
warn("Unused comment with key: " + full_key + str(k)) # TODO # noqa: B028
__version__ = "0.0.0"
__all__ = [
"dump",
"dumps",
"load",
"loads",
"JSONDecoder",
"JSONDecodeError",
"JSONEncoder",
]


def load(
Expand Down Expand Up @@ -184,75 +95,6 @@ def loads(
)


def add_comments(data: str, comments: Comments) -> str:
header, comments = _get_comments({0: deepcopy(comments)}, 0)
header = _make_comment(header) + "\n" if header else ""
result = []
stack = []
line_shift = 0
array_index: int | None = None
key: str | int | None = None
for token in generate_tokens(StringIO(data).readline):
if (
token.type == STRING or (array_index is not None and token.string != "]")
) and result[-1].type == NL:
key = array_index if array_index is not None else json.loads(token.string)
stack.append((comments, array_index, key))
comm, comments = _get_comments(comments, key)
if comm:
comm = _make_comment(comm, token.start[1])
comm_coord = (token.start[0] + line_shift, 0)
result.append(
TokenInfo(
COMMENT,
comm,
comm_coord,
comm_coord,
"",
),
)
result.append(
TokenInfo(
NL,
"\n",
comm_coord,
comm_coord,
"",
),
)
line_shift += 1

if token.string == ",":
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()
if array_index is not None:
array_index += 1
elif token.string == "[":
stack.append((comments, array_index, key))
array_index = 0
elif token.string == "{":
stack.append((comments, array_index, key))
array_index = None
elif token.string in {"]", "}"}:
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()
if result[-1].type == NL and result[-2].string != ",":
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()

token = TokenInfo( # TODO # noqa: PLW2901
token.type,
token.string,
(token.start[0] + line_shift, token.start[1]),
(token.end[0] + line_shift, token.end[1]),
token.line,
)
result.append(token)

assert not stack, "Error when adding comments to JSON" # TODO # noqa: S101
return header + untokenize(result)


def dumps(
obj: Any,
*,
Expand Down Expand Up @@ -294,10 +136,10 @@ def dumps(
if comments is None:
return data
if indent is None:
warn("Can't add comments to non-indented JSON") # TODO # noqa: B028
warnings.warn("Can't add comments to non-indented JSON", stacklevel=2)
return data

return add_comments(data, comments)
return _add_comments(data, comments)


def dump(
Expand Down
131 changes: 131 additions & 0 deletions jsonc/_add_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

import copy
import io
import json
import sys
import warnings
from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
from typing import TYPE_CHECKING

if TYPE_CHECKING:
CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
Comments = str | CommentsDict | tuple[str, CommentsDict]


def _make_comment(text: str, indent=0) -> str:
return "\n".join(
" " * indent + "// " + line if line else "" for line in text.splitlines()
)


def _get_comments(
comments: CommentsDict | None,
key: str | int,
) -> tuple[str | None, CommentsDict | None]:
if comments is not None:
cbody: Comments | None = comments.pop(key, None) # type: ignore[reportGeneralTypeIssues]
if isinstance(cbody, tuple):
chead, cbody = cbody
elif isinstance(cbody, str):
chead = cbody
cbody = None
else:
chead = None
return chead, cbody
return None, None


def _warn_unused(
comments: CommentsDict | None,
stack: list[tuple[CommentsDict | None, int | None, str | int]],
):
if not comments:
return
full_key = ".".join(str(key) for _, _, key in stack[1:])
if full_key:
full_key += "."
for k in comments:
f = sys._getframe() # noqa: SLF001
filename = f.f_code.co_filename
stacklevel = 2
while f := f.f_back:
if f.f_code.co_filename != filename:
break
stacklevel += 1
warnings.warn(
"Unused comment with key: " + full_key + str(k),
stacklevel=4,
)


def _add_comments(data: str, comments: Comments) -> str: # noqa: C901
header, cdict = _get_comments({0: copy.deepcopy(comments)}, 0)
header = _make_comment(header) + "\n" if header else ""
result = []
stack = []
line_shift = 0
array_index: int | None = None
key: str | int | None = None
for token in generate_tokens(io.StringIO(data).readline):
if (
token.type == STRING or (array_index is not None and token.string != "]")
) and result[-1].type == NL:
key = array_index if array_index is not None else json.loads(token.string)
stack.append((cdict, array_index, key))
comm, cdict = _get_comments(cdict, key) # type: ignore[reportGeneralTypeIssues]
if comm:
comm = _make_comment(comm, token.start[1])
comm_coord = (token.start[0] + line_shift, 0)
result.append(
TokenInfo(
COMMENT,
comm,
comm_coord,
comm_coord,
"",
),
)
result.append(
TokenInfo(
NL,
"\n",
comm_coord,
comm_coord,
"",
),
)
line_shift += 1

if token.string == ",":
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()
if array_index is not None:
array_index += 1
elif token.string == "[":
stack.append((cdict, array_index, key))
array_index = 0
elif token.string == "{":
stack.append((cdict, array_index, key))
array_index = None
elif token.string in {"]", "}"}:
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()
if result[-1].type == NL and result[-2].string != ",":
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()

result.append(
TokenInfo(
token.type,
token.string,
(token.start[0] + line_shift, token.start[1]),
(token.end[0] + line_shift, token.end[1]),
token.line,
),
)

if stack:
msg = "Error when adding comments to JSON"
raise ValueError(msg)
return header + untokenize(result)
Loading

0 comments on commit 82bbc07

Please sign in to comment.