Skip to content

Commit

Permalink
Merge pull request #620 from tconbeer/fix/handle-reserved-kws
Browse files Browse the repository at this point in the history
fix: parse keywords as names if qualified by period
  • Loading branch information
tconbeer authored Jul 26, 2024
2 parents 45332bb + ed15319 commit 3ceb7e7
Show file tree
Hide file tree
Showing 11 changed files with 223 additions and 31 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Formatting Changes and Bug Fixes

- sqlfmt will now parse unquoted reserved keywords as names if they are qualified by a period, e.g., `foo.select` or `foo.case` ([#599](https://github.com/tconbeer/sqlfmt/issues/599) - thank you [@matthieucan](https://github.com/matthieucan)!).

## [0.22.0] - 2024-07-25

### Formatting Changes and Bug Fixes
Expand Down
44 changes: 42 additions & 2 deletions src/sqlfmt/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,41 @@ def handle_number(analyzer: "Analyzer", source_string: str, match: re.Match) ->
)


def handle_nonreserved_keyword(
def handle_reserved_keyword(
analyzer: "Analyzer",
source_string: str,
match: re.Match,
action: Callable[["Analyzer", str, re.Match], None],
) -> None:
"""
Reserved keywords can be used in most dialects as table or column
names without quoting if they are qualified.
https://github.com/tconbeer/sqlfmt/issues/599
This checks if the previous token is a period, and if so, lexes
this token as a name; otherwise this action executes the passed
action (which likely adds the node as some kind of keyword).
"""
if analyzer.previous_node is None:
action(analyzer, source_string, match)
return

previous_token, _ = get_previous_token(analyzer.previous_node)
if previous_token is not None and previous_token.type is TokenType.DOT:
token = Token.from_match(source_string, match, token_type=TokenType.NAME)
if not token.prefix:
node = analyzer.node_manager.create_node(
token=token, previous_node=analyzer.previous_node
)
analyzer.node_buffer.append(node)
analyzer.pos = token.epos
return

# in all other cases, this is a keyword.
action(analyzer, source_string, match)


def handle_nonreserved_top_level_keyword(
analyzer: "Analyzer",
source_string: str,
match: re.Match,
Expand All @@ -317,6 +351,10 @@ def handle_nonreserved_keyword(
"""
Checks to see if we're at depth 0 (assuming this is a name); if so, then take the
passed action, otherwise lex it as a name.
For example, this allows us to lex these differently:
explain select 1;
select explain, 1;
"""
token = Token.from_match(source_string, match, token_type=TokenType.NAME)
node = analyzer.node_manager.create_node(
Expand All @@ -326,7 +364,9 @@ def handle_nonreserved_keyword(
analyzer.node_buffer.append(node)
analyzer.pos = token.epos
else:
action(analyzer, source_string, match)
handle_reserved_keyword(
analyzer=analyzer, source_string=source_string, match=match, action=action
)


def lex_ruleset(
Expand Down
83 changes: 62 additions & 21 deletions src/sqlfmt/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,23 @@
priority=1000,
pattern=group(r"case") + group(r"\W", r"$"),
action=partial(
actions.add_node_to_buffer, token_type=TokenType.STATEMENT_START
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.STATEMENT_START
),
),
),
Rule(
name="statement_end",
priority=1010,
pattern=group(r"end") + group(r"\W", r"$"),
action=partial(
actions.safe_add_node_to_buffer,
token_type=TokenType.STATEMENT_END,
fallback_token_type=TokenType.NAME,
actions.handle_reserved_keyword,
action=partial(
actions.safe_add_node_to_buffer,
token_type=TokenType.STATEMENT_END,
fallback_token_type=TokenType.NAME,
),
),
),
Rule(
Expand Down Expand Up @@ -65,7 +71,12 @@
r"within\s+group",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="star_replace_exclude",
Expand All @@ -76,8 +87,10 @@
)
+ group(r"\s+\("),
action=partial(
actions.add_node_to_buffer,
token_type=TokenType.WORD_OPERATOR,
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
Expand All @@ -88,13 +101,21 @@
name="join_using",
priority=1110,
pattern=group(r"using") + group(r"\s*\("),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="on",
priority=1120,
pattern=group(r"on") + group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.ON),
action=partial(
actions.handle_reserved_keyword,
action=partial(actions.add_node_to_buffer, token_type=TokenType.ON),
),
),
Rule(
name="boolean_operator",
Expand All @@ -106,8 +127,10 @@
)
+ group(r"\W", r"$"),
action=partial(
actions.add_node_to_buffer,
token_type=TokenType.BOOLEAN_OPERATOR,
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.BOOLEAN_OPERATOR
),
),
),
Rule(
Expand Down Expand Up @@ -154,15 +177,25 @@
r"returning",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="frame_clause",
priority=1305,
pattern=group(r"(range|rows|groups)\s+")
+ group(r"(between\s+)?((unbounded|\d+)\s+(preceding|following)|current\s+row)")
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
# BQ arrays use an offset(n) function for
Expand All @@ -172,7 +205,12 @@
name="offset_keyword",
priority=1310,
pattern=group(r"offset") + group(r"\s+", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="set_operator",
Expand All @@ -181,15 +219,18 @@
r"(union|intersect|except|minus)(\s+(all|distinct))?(\s+by\s+name)?",
)
+ group(r"\W", r"$"),
action=actions.handle_set_operator,
action=partial(
actions.handle_reserved_keyword,
action=actions.handle_set_operator,
),
),
Rule(
name="explain",
priority=2000,
pattern=group(r"explain(\s+(analyze|verbose|using\s+(tabular|json|text)))?")
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
Expand All @@ -200,7 +241,7 @@
priority=2010,
pattern=group(r"grant", r"revoke") + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(actions.lex_ruleset, new_ruleset=GRANT),
),
),
Expand All @@ -209,7 +250,7 @@
priority=2015,
pattern=group(CREATE_CLONABLE + r"\s+.+?\s+clone") + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=CLONE,
Expand All @@ -221,7 +262,7 @@
priority=2020,
pattern=group(CREATE_FUNCTION, ALTER_DROP_FUNCTION) + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=FUNCTION,
Expand All @@ -237,7 +278,7 @@
)
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=WAREHOUSE,
Expand Down Expand Up @@ -301,7 +342,7 @@
+ r"(?!\()"
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(actions.add_node_to_buffer, token_type=TokenType.FMT_OFF),
),
),
Expand Down
14 changes: 12 additions & 2 deletions src/sqlfmt/rules/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
name="unterm_keyword",
priority=1300,
pattern=group(CREATE_CLONABLE, r"clone") + group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="word_operator",
Expand All @@ -22,6 +27,11 @@
r"before",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
]
19 changes: 16 additions & 3 deletions src/sqlfmt/rules/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
r"as",
)
+ group(r"\W", r"$"),
action=actions.handle_ddl_as,
action=partial(
actions.handle_reserved_keyword,
action=actions.handle_ddl_as,
),
),
Rule(
name="word_operator",
Expand All @@ -27,7 +30,12 @@
r"runtime_version",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="unterm_keyword",
Expand Down Expand Up @@ -85,6 +93,11 @@
r"(re)?set(\s+all)?",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
7 changes: 6 additions & 1 deletion src/sqlfmt/rules/grant.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
r"restrict",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
7 changes: 6 additions & 1 deletion src/sqlfmt/rules/warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
r"rename\s+to",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
2 changes: 1 addition & 1 deletion src/sqlfmt_primer/primer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_projects() -> List[SQLProject]:
SQLProject(
name="gitlab",
git_url="https://github.com/tconbeer/gitlab-analytics-sqlfmt.git",
git_ref="b1935f4", # sqlfmt 54b8edd
git_ref="5cd49f6", # sqlfmt aed0f39
expected_changed=1,
expected_unchanged=2416,
expected_errored=0,
Expand Down
21 changes: 21 additions & 0 deletions tests/data/preformatted/008_reserved_names.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
with
"select" as (select * from foo.select),
"case" as (select * from foo.case),
"end" as (select * from foo.end),
"as" as (select * from foo.as),
"interval" as (select * from foo.interval),
"exclude" as (select * from foo.exclude),
"using" as (select * from foo.using),
"on" as (select * from foo.on),
"and" as (select * from foo.and),
"limit" as (select * from foo.limit),
"over" as (select * from foo.over),
"between" as (select * from foo.between),
"union" as (select * from foo.union),
"explain" as (select * from foo.explain),
"grant" as (select * from foo.grant),
"create" as (select * from foo.create),
"alter" as (select * from foo.alter),
"truncate" as (select * from foo.truncate),
"drop" as (select * from foo.drop)
select 1
Loading

0 comments on commit 3ceb7e7

Please sign in to comment.