From d2ad8fe192ffef8aa3f5fef1abd3c7279583fc8d Mon Sep 17 00:00:00 2001 From: Ted Conbeer Date: Wed, 20 Sep 2023 07:46:15 -0600 Subject: [PATCH] fix #482: support anti, semi, positional, asof joins --- CHANGELOG.md | 1 + src/sqlfmt/rules/__init__.py | 6 +- tests/data/unformatted/129_duckdb_joins.sql | 86 +++++++++++++++++++ .../test_general_formatting.py | 1 + tests/unit_tests/test_rule.py | 6 ++ 5 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 tests/data/unformatted/129_duckdb_joins.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eb8fc47..0d6f1c2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - `any()` and `all()` will no longer get spaces between the function name and the parenthesis, unless they are a part of a `like any ()` or `like all ()` operator ([#483](https://github.com/tconbeer/sqlfmt/issues/483) - thank you [@damirbk](https://github.com/damirbk)!). - Snowflake's `//` comment markers are now parsed as comments and rewritten to `--` on formatting ([#468](https://github.com/tconbeer/sqlfmt/issues/468) - thank you [@nilsonavp](https://github.com/nilsonavp)!). +- DuckDB's `semi`, `anti`, `positional`, and `asof` joins are now supported. ([#482](https://github.com/tconbeer/sqlfmt/issues/482)). ## [0.19.2] - 2023-07-31 diff --git a/src/sqlfmt/rules/__init__.py b/src/sqlfmt/rules/__init__.py index d97ee2f8..a714f02b 100644 --- a/src/sqlfmt/rules/__init__.py +++ b/src/sqlfmt/rules/__init__.py @@ -122,10 +122,8 @@ ), r"delete\s+from", r"from", - ( - r"(natural\s+)?" - r"((inner|cross|((left|right|full)(\s+outer)?))\s+)?join" - ), + r"((cross|positional|semi|anti)\s+)?join", + (r"((natural|asof)\s+)?" r"((inner|(left|right|full)(\s+outer)?)\s+)?join"), # this is the USING following DELETE, not the join operator # (see above) r"using", diff --git a/tests/data/unformatted/129_duckdb_joins.sql b/tests/data/unformatted/129_duckdb_joins.sql new file mode 100644 index 00000000..bf848a20 --- /dev/null +++ b/tests/data/unformatted/129_duckdb_joins.sql @@ -0,0 +1,86 @@ +-- Source https://duckdb.org/docs/sql/query_syntax/from.html +-- Copyright DuckDB Foundation. + +-- return a list of cars that have a valid region. +SELECT cars.name, cars.manufacturer +FROM cars SEMI JOIN region +ON cars.region = region.id; +-- return a list of cars with no recorded safety data. +SELECT cars.name, cars.manufacturer +FROM cars ANTI JOIN safety_data +ON cars.safety_report_id = safety_data.report_id; +SELECT * +FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j); +SELECT * +FROM generate_series(0, 1) t(i), LATERAL (SELECT i + 10 UNION ALL SELECT i + 100) t2(j); +CREATE TABLE t1 AS SELECT * FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j); +SELECT * FROM t1, LATERAL (SELECT i + j) t2(k); +-- treat two data frames as a single table +SELECT df1.*, df2.* +FROM df1 POSITIONAL JOIN df2; +-- attach prices to stock trades +SELECT t.*, p.price +FROM trades t ASOF JOIN prices p + ON t.symbol = p.symbol AND t.created_at >= p.created_at; + -- attach prices or NULLs to stock trades +SELECT * +FROM trades t ASOF LEFT JOIN prices p + ON t.symbol = p.symbol AND t.created_at >= p.created_at; +SELECT * +FROM trades t ASOF JOIN prices p USING (symbol, created_at); +-- Returns symbol, trades.created_at, price (but NOT prices.created_at) +SELECT t.symbol, t.created_at AS trade_when, p.created_at AS price_when, price +FROM trades t ASOF LEFT JOIN prices p USING (symbol, created_at); +)))))__SQLFMT_OUTPUT__((((( +-- Source https://duckdb.org/docs/sql/query_syntax/from.html +-- Copyright DuckDB Foundation. +-- return a list of cars that have a valid region. +select cars.name, cars.manufacturer +from cars +semi join region on cars.region = region.id +; +-- return a list of cars with no recorded safety data. +select cars.name, cars.manufacturer +from cars +anti join safety_data on cars.safety_report_id = safety_data.report_id +; +select * +from range(3) t(i), lateral(select i + 1) t2(j) +; +select * +from + generate_series(0, 1) t(i), + lateral( + select i + 10 + union all + select i + 100 + ) t2(j) +; +CREATE TABLE t1 AS SELECT * FROM range(3) t(i), LATERAL (SELECT i + 1) t2(j); +select * +from t1, lateral(select i + j) t2(k) +; +-- treat two data frames as a single table +select df1.*, df2.* +from df1 +positional join df2 +; +-- attach prices to stock trades +select t.*, p.price +from trades t +asof join prices p on t.symbol = p.symbol and t.created_at >= p.created_at +; +-- attach prices or NULLs to stock trades +select * +from trades t +asof left join prices p on t.symbol = p.symbol and t.created_at >= p.created_at +; +select * +from trades t +asof join prices p using (symbol, created_at) +; +-- Returns symbol, trades.created_at, price (but NOT prices.created_at) +select t.symbol, t.created_at as trade_when, p.created_at as price_when, price +from trades t +asof left join prices p using (symbol, created_at) +; diff --git a/tests/functional_tests/test_general_formatting.py b/tests/functional_tests/test_general_formatting.py index 2d5212ba..73f11a60 100644 --- a/tests/functional_tests/test_general_formatting.py +++ b/tests/functional_tests/test_general_formatting.py @@ -45,6 +45,7 @@ "unformatted/126_blank_lines.sql", "unformatted/127_more_comments.sql", "unformatted/128_double_slash_comments.sql", + "unformatted/129_duckdb_joins.sql", "unformatted/200_base_model.sql", "unformatted/201_basic_snapshot.sql", "unformatted/202_unpivot_macro.sql", diff --git a/tests/unit_tests/test_rule.py b/tests/unit_tests/test_rule.py index 9d4f3735..839f15b2 100644 --- a/tests/unit_tests/test_rule.py +++ b/tests/unit_tests/test_rule.py @@ -169,6 +169,12 @@ def get_rule(ruleset: List[Rule], rule_name: str) -> Rule: (MAIN, "unterm_keyword", "natural\t full outer join"), (MAIN, "unterm_keyword", "left join"), (MAIN, "unterm_keyword", "cross join"), + (MAIN, "unterm_keyword", "positional join"), + (MAIN, "unterm_keyword", "asof join"), + (MAIN, "unterm_keyword", "asof left join"), + (MAIN, "unterm_keyword", "asof right\n outer\n join"), + (MAIN, "unterm_keyword", "semi join"), + (MAIN, "unterm_keyword", "anti join"), (MAIN, "unterm_keyword", "join"), (MAIN, "unterm_keyword", "values"), (MAIN, "unterm_keyword", "cluster by"),