Skip to content

Commit

Permalink
fix #482: support anti, semi, positional, asof joins
Browse files Browse the repository at this point in the history
  • Loading branch information
tconbeer committed Sep 25, 2023
1 parent 63c0433 commit d2ad8fe
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.

- `any()` and `all()` will no longer get spaces between the function name and the parenthesis, unless they are a part of a `like any ()` or `like all ()` operator ([#483](https://github.com/tconbeer/sqlfmt/issues/483) - thank you [@damirbk](https://github.com/damirbk)!).
- Snowflake's `//` comment markers are now parsed as comments and rewritten to `--` on formatting ([#468](https://github.com/tconbeer/sqlfmt/issues/468) - thank you [@nilsonavp](https://github.com/nilsonavp)!).
- DuckDB's `semi`, `anti`, `positional`, and `asof` joins are now supported. ([#482](https://github.com/tconbeer/sqlfmt/issues/482)).

## [0.19.2] - 2023-07-31

Expand Down
6 changes: 2 additions & 4 deletions src/sqlfmt/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,8 @@
),
r"delete\s+from",
r"from",
(
r"(natural\s+)?"
r"((inner|cross|((left|right|full)(\s+outer)?))\s+)?join"
),
r"((cross|positional|semi|anti)\s+)?join",
(r"((natural|asof)\s+)?" r"((inner|(left|right|full)(\s+outer)?)\s+)?join"),
# this is the USING following DELETE, not the join operator
# (see above)
r"using",
Expand Down
86 changes: 86 additions & 0 deletions tests/data/unformatted/129_duckdb_joins.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
-- Source https://duckdb.org/docs/sql/query_syntax/from.html
-- Copyright DuckDB Foundation.

-- return a list of cars that have a valid region.
SELECT cars.name, cars.manufacturer
FROM cars SEMI JOIN region
ON cars.region = region.id;
-- return a list of cars with no recorded safety data.
SELECT cars.name, cars.manufacturer
FROM cars ANTI JOIN safety_data
ON cars.safety_report_id = safety_data.report_id;
SELECT *
FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j);
SELECT *
FROM generate_series(0, 1) t(i), LATERAL (SELECT i + 10 UNION ALL SELECT i + 100) t2(j);
CREATE TABLE t1 AS SELECT * FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j);
SELECT * FROM t1, LATERAL (SELECT i + j) t2(k);
-- treat two data frames as a single table
SELECT df1.*, df2.*
FROM df1 POSITIONAL JOIN df2;
-- attach prices to stock trades
SELECT t.*, p.price
FROM trades t ASOF JOIN prices p
ON t.symbol = p.symbol AND t.created_at >= p.created_at;
-- attach prices or NULLs to stock trades
SELECT *
FROM trades t ASOF LEFT JOIN prices p
ON t.symbol = p.symbol AND t.created_at >= p.created_at;
SELECT *
FROM trades t ASOF JOIN prices p USING (symbol, created_at);
-- Returns symbol, trades.created_at, price (but NOT prices.created_at)
SELECT t.symbol, t.created_at AS trade_when, p.created_at AS price_when, price
FROM trades t ASOF LEFT JOIN prices p USING (symbol, created_at);
)))))__SQLFMT_OUTPUT__(((((
-- Source https://duckdb.org/docs/sql/query_syntax/from.html
-- Copyright DuckDB Foundation.
-- return a list of cars that have a valid region.
select cars.name, cars.manufacturer
from cars
semi join region on cars.region = region.id
;
-- return a list of cars with no recorded safety data.
select cars.name, cars.manufacturer
from cars
anti join safety_data on cars.safety_report_id = safety_data.report_id
;
select *
from range(3) t(i), lateral(select i + 1) t2(j)
;
select *
from
generate_series(0, 1) t(i),
lateral(
select i + 10
union all
select i + 100
) t2(j)
;
CREATE TABLE t1 AS SELECT * FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j);
select *
from t1, lateral(select i + j) t2(k)
;
-- treat two data frames as a single table
select df1.*, df2.*
from df1
positional join df2
;
-- attach prices to stock trades
select t.*, p.price
from trades t
asof join prices p on t.symbol = p.symbol and t.created_at >= p.created_at
;
-- attach prices or NULLs to stock trades
select *
from trades t
asof left join prices p on t.symbol = p.symbol and t.created_at >= p.created_at
;
select *
from trades t
asof join prices p using (symbol, created_at)
;
-- Returns symbol, trades.created_at, price (but NOT prices.created_at)
select t.symbol, t.created_at as trade_when, p.created_at as price_when, price
from trades t
asof left join prices p using (symbol, created_at)
;
1 change: 1 addition & 0 deletions tests/functional_tests/test_general_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"unformatted/126_blank_lines.sql",
"unformatted/127_more_comments.sql",
"unformatted/128_double_slash_comments.sql",
"unformatted/129_duckdb_joins.sql",
"unformatted/200_base_model.sql",
"unformatted/201_basic_snapshot.sql",
"unformatted/202_unpivot_macro.sql",
Expand Down
6 changes: 6 additions & 0 deletions tests/unit_tests/test_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ def get_rule(ruleset: List[Rule], rule_name: str) -> Rule:
(MAIN, "unterm_keyword", "natural\t full outer join"),
(MAIN, "unterm_keyword", "left join"),
(MAIN, "unterm_keyword", "cross join"),
(MAIN, "unterm_keyword", "positional join"),
(MAIN, "unterm_keyword", "asof join"),
(MAIN, "unterm_keyword", "asof left join"),
(MAIN, "unterm_keyword", "asof right\n outer\n join"),
(MAIN, "unterm_keyword", "semi join"),
(MAIN, "unterm_keyword", "anti join"),
(MAIN, "unterm_keyword", "join"),
(MAIN, "unterm_keyword", "values"),
(MAIN, "unterm_keyword", "cluster by"),
Expand Down

0 comments on commit d2ad8fe

Please sign in to comment.