-
Notifications
You must be signed in to change notification settings - Fork 5
/
_parser.py
110 lines (96 loc) · 2.92 KB
/
_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# built-in
import tokenize
from enum import Enum
from typing import Iterator, NamedTuple
class Message(str, Enum):
LN001 = 'code line is too long'
LN002 = 'doc/comment line is too long'
SKIP_PREFIXES = ('noqa', 'n:', 'w:', 'e:', 'r:', 'pragma:')
SQL_PREFIXES = ('SELECT ', 'UPDATE', 'DELETE ')
TRUNCATE_TO = 10
EXCLUDED_TOKENS = frozenset({
tokenize.NEWLINE,
tokenize.NL,
tokenize.ENCODING,
tokenize.ENDMARKER,
tokenize.ERRORTOKEN,
tokenize.COMMA,
tokenize.LBRACE,
tokenize.RBRACE,
tokenize.COLON,
})
EXCLUDED_PAIRS = frozenset({
(tokenize.OP, '('),
(tokenize.OP, ')'),
(tokenize.OP, '['),
(tokenize.OP, ']'),
(tokenize.OP, '{'),
(tokenize.OP, '}'),
(tokenize.OP, ','),
(tokenize.OP, ';'),
(tokenize.OP, ':'),
})
class LineInfo(NamedTuple):
message: Message
row: int
length: int
line: str
def get_line_length(line: str) -> int:
chunks = line.split()
if not chunks:
return len(line)
last_chunk_size = len(chunks[-1])
if last_chunk_size < TRUNCATE_TO:
return len(line)
return len(line) - last_chunk_size + TRUNCATE_TO
def get_lines_info(token: tokenize.TokenInfo) -> Iterator[LineInfo]:
if token.type in EXCLUDED_TOKENS:
return
if (token.type, token.string) in EXCLUDED_PAIRS:
return
if token.type not in {tokenize.COMMENT, tokenize.STRING}:
if token.end[1] > token.start[1]:
yield LineInfo(
message=Message.LN001,
row=token.end[0],
length=token.end[1],
line=token.line,
)
else:
yield LineInfo(
message=Message.LN001,
row=token.start[0],
length=token.start[1],
line=token.line,
)
return
if token.type == tokenize.COMMENT:
# skip shebang
if token.string.startswith('#!'):
return
# skip noqa, pragma, and other special tokens
if token.string.lower()[1:].lstrip().startswith(SKIP_PREFIXES):
return
# skip single-line strings
if token.type == tokenize.STRING and '\n' not in token.string:
# do not skip SQL queries
if token.string.lstrip('brfu').lstrip('"\'').startswith(SQL_PREFIXES):
yield LineInfo(
message=Message.LN001,
row=token.start[0],
length=token.start[1] + get_line_length(token.string),
line=token.line,
)
return
# analyze every line of comments and multiline strings
lines = token.string.splitlines()
for offset, line in enumerate(lines):
line_length = get_line_length(line)
if offset == 0:
line_length += token.start[1]
yield LineInfo(
message=Message.LN002,
row=token.start[0] + offset,
length=line_length,
line=line,
)