Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup #19

Merged
merged 3 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 20 additions & 178 deletions jsonc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,119 +13,30 @@

from __future__ import annotations

from copy import deepcopy
from io import StringIO
from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
from typing import TYPE_CHECKING, Any, TextIO
from warnings import warn

__version__ = "0.0.0"
import json
import re
from json import JSONDecoder, JSONEncoder # for compatibility
import warnings
from json import JSONDecodeError, JSONDecoder, JSONEncoder
from typing import TYPE_CHECKING

from jsonc._add_comments import _add_comments
from jsonc._util import _add_trailing_comma, _remove_c_comment, _remove_trailing_comma

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any, TextIO

CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
Comments = str | CommentsDict | tuple[str, CommentsDict]


_REMOVE_C_COMMENT = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
|
( # Comment
\/\*.*?\*\/
|
\/\/[^\r\n]*?(?:[\r\n])
)
"""


_REMOVE_TRAILING_COMMA = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
| # Right Brace without Trailing Comma & Spaces
,\s*([\]}])
"""


_ADD_TRAILING_COMMA = r"""
( # String Literal
\"(?:\\.|[^\\\"])*?\"
)
| # Don't match opening braces to avoid {,}
((?<=\")|[^,\[{\s])
(?=\s*([\]}]))
"""


def _remove_c_comment(text: str) -> str:
if text[-1] != "\n":
text = text + "\n"
return re.sub(
_REMOVE_C_COMMENT,
lambda x: x.group(1),
text,
flags=re.DOTALL | re.VERBOSE,
)


def _remove_trailing_comma(text: str) -> str:
return re.sub(
_REMOVE_TRAILING_COMMA,
lambda x: x.group(1) or x.group(2),
text,
flags=re.DOTALL | re.VERBOSE,
)


def _add_trailing_comma(text: str) -> str:
return re.sub(
_ADD_TRAILING_COMMA,
lambda x: x.group(1) or x.group(2) + ",",
text,
flags=re.DOTALL | re.VERBOSE,
)


def _make_comment(text: str, indent=0) -> str:
return "\n".join(
" " * indent + "// " + line if line else "" for line in text.splitlines()
)

from jsonc._add_comments import Comments

def _get_comments(
comments: CommentsDict | None,
key: str | int,
) -> tuple[str | None, CommentsDict | None]:
if comments is not None:
comments = comments.pop(key, None)
if isinstance(comments, tuple):
comm, comments = comments
elif isinstance(comments, str):
comm = comments
comments = None
else:
comm = None
return comm, comments
return None, None


def _warn_unused(
comments: CommentsDict | None,
stack: list[tuple[CommentsDict | None, int | None, str | int]],
):
if not comments:
return
full_key = ".".join(str(key) for _, _, key in stack[1:])
if full_key:
full_key += "."
for k in comments:
warn("Unused comment with key: " + full_key + str(k)) # TODO # noqa: B028
__version__ = "0.0.0"
__all__ = [
"dump",
"dumps",
"load",
"loads",
"JSONDecoder",
"JSONDecodeError",
"JSONEncoder",
]


def load(
Expand Down Expand Up @@ -184,75 +95,6 @@ def loads(
)


def add_comments(data: str, comments: Comments) -> str:
header, comments = _get_comments({0: deepcopy(comments)}, 0)
header = _make_comment(header) + "\n" if header else ""
result = []
stack = []
line_shift = 0
array_index: int | None = None
key: str | int | None = None
for token in generate_tokens(StringIO(data).readline):
if (
token.type == STRING or (array_index is not None and token.string != "]")
) and result[-1].type == NL:
key = array_index if array_index is not None else json.loads(token.string)
stack.append((comments, array_index, key))
comm, comments = _get_comments(comments, key)
if comm:
comm = _make_comment(comm, token.start[1])
comm_coord = (token.start[0] + line_shift, 0)
result.append(
TokenInfo(
COMMENT,
comm,
comm_coord,
comm_coord,
"",
),
)
result.append(
TokenInfo(
NL,
"\n",
comm_coord,
comm_coord,
"",
),
)
line_shift += 1

if token.string == ",":
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()
if array_index is not None:
array_index += 1
elif token.string == "[":
stack.append((comments, array_index, key))
array_index = 0
elif token.string == "{":
stack.append((comments, array_index, key))
array_index = None
elif token.string in {"]", "}"}:
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()
if result[-1].type == NL and result[-2].string != ",":
_warn_unused(comments, stack)
comments, array_index, key = stack.pop()

token = TokenInfo( # TODO # noqa: PLW2901
token.type,
token.string,
(token.start[0] + line_shift, token.start[1]),
(token.end[0] + line_shift, token.end[1]),
token.line,
)
result.append(token)

assert not stack, "Error when adding comments to JSON" # TODO # noqa: S101
return header + untokenize(result)


def dumps(
obj: Any,
*,
Expand Down Expand Up @@ -294,10 +136,10 @@ def dumps(
if comments is None:
return data
if indent is None:
warn("Can't add comments to non-indented JSON") # TODO # noqa: B028
warnings.warn("Can't add comments to non-indented JSON", stacklevel=2)
return data

return add_comments(data, comments)
return _add_comments(data, comments)


def dump(
Expand Down
131 changes: 131 additions & 0 deletions jsonc/_add_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

import copy
import io
import json
import sys
import warnings
from tokenize import COMMENT, NL, STRING, TokenInfo, generate_tokens, untokenize
from typing import TYPE_CHECKING

if TYPE_CHECKING:
CommentsDict = dict[str, "Comments"] | dict[int, "Comments"]
Comments = str | CommentsDict | tuple[str, CommentsDict]


def _make_comment(text: str, indent=0) -> str:
return "\n".join(
" " * indent + "// " + line if line else "" for line in text.splitlines()
)


def _get_comments(
comments: CommentsDict | None,
key: str | int,
) -> tuple[str | None, CommentsDict | None]:
if comments is not None:
cbody: Comments | None = comments.pop(key, None) # type: ignore[reportGeneralTypeIssues]
if isinstance(cbody, tuple):
chead, cbody = cbody
elif isinstance(cbody, str):
chead = cbody
cbody = None
else:
chead = None
return chead, cbody
return None, None


def _warn_unused(
comments: CommentsDict | None,
stack: list[tuple[CommentsDict | None, int | None, str | int]],
):
if not comments:
return
full_key = ".".join(str(key) for _, _, key in stack[1:])
if full_key:
full_key += "."
for k in comments:
f = sys._getframe() # noqa: SLF001
filename = f.f_code.co_filename
stacklevel = 2
while f := f.f_back:
if f.f_code.co_filename != filename:
break
stacklevel += 1
warnings.warn(
"Unused comment with key: " + full_key + str(k),
stacklevel=4,
)


def _add_comments(data: str, comments: Comments) -> str: # noqa: C901
header, cdict = _get_comments({0: copy.deepcopy(comments)}, 0)
header = _make_comment(header) + "\n" if header else ""
result = []
stack = []
line_shift = 0
array_index: int | None = None
key: str | int | None = None
for token in generate_tokens(io.StringIO(data).readline):
if (
token.type == STRING or (array_index is not None and token.string != "]")
) and result[-1].type == NL:
key = array_index if array_index is not None else json.loads(token.string)
stack.append((cdict, array_index, key))
comm, cdict = _get_comments(cdict, key) # type: ignore[reportGeneralTypeIssues]
if comm:
comm = _make_comment(comm, token.start[1])
comm_coord = (token.start[0] + line_shift, 0)
result.append(
TokenInfo(
COMMENT,
comm,
comm_coord,
comm_coord,
"",
),
)
result.append(
TokenInfo(
NL,
"\n",
comm_coord,
comm_coord,
"",
),
)
line_shift += 1

if token.string == ",":
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()
if array_index is not None:
array_index += 1
elif token.string == "[":
stack.append((cdict, array_index, key))
array_index = 0
elif token.string == "{":
stack.append((cdict, array_index, key))
array_index = None
elif token.string in {"]", "}"}:
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()
if result[-1].type == NL and result[-2].string != ",":
_warn_unused(cdict, stack)
cdict, array_index, key = stack.pop()

result.append(
TokenInfo(
token.type,
token.string,
(token.start[0] + line_shift, token.start[1]),
(token.end[0] + line_shift, token.end[1]),
token.line,
),
)

if stack:
msg = "Error when adding comments to JSON"
raise ValueError(msg)
return header + untokenize(result)
Loading