Skip to content

Commit

Permalink
Feat: transpile multi-arg DISTINCT expression (#2936)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgesittas authored Feb 8, 2024
1 parent 08cd117 commit 85073d1
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 0 deletions.
1 change: 1 addition & 0 deletions sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ class Generator(generator.Generator):
IGNORE_NULLS_IN_FUNC = True
JSON_PATH_BRACKETED_KEY_SUPPORTED = False
SUPPORTS_CREATE_TABLE_LIKE = False
MULTI_ARG_DISTINCT = False

TRANSFORMS = {
**generator.Generator.TRANSFORMS,
Expand Down
1 change: 1 addition & 0 deletions sqlglot/dialects/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ class Generator(generator.Generator):
JSON_TYPE_REQUIRED_FOR_EXTRACTION = True
SUPPORTS_UNLOGGED_TABLES = True
LIKE_PROPERTY_INSIDE_SCHEMA = True
MULTI_ARG_DISTINCT = False

SUPPORTED_JSON_PATH_PARTS = {
exp.JSONPathKey,
Expand Down
1 change: 1 addition & 0 deletions sqlglot/dialects/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ class Generator(generator.Generator):
LIMIT_ONLY_LITERALS = True
SUPPORTS_SINGLE_ARG_CONCAT = False
LIKE_PROPERTY_INSIDE_SCHEMA = True
MULTI_ARG_DISTINCT = False

PROPERTIES_LOCATION = {
**generator.Generator.PROPERTIES_LOCATION,
Expand Down
11 changes: 11 additions & 0 deletions sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ class Generator(metaclass=_Generator):
# Whether or not the LikeProperty needs to be specified inside of the schema clause
LIKE_PROPERTY_INSIDE_SCHEMA = False

# Whether or not DISTINCT can be followed by multiple args in an AggFunc. If not, it will be
# transpiled into a series of CASE-WHEN-ELSE, ultimately using a tuple conseisting of the args
MULTI_ARG_DISTINCT = True

# Whether or not the JSON extraction operators expect a value of type JSON
JSON_TYPE_REQUIRED_FOR_EXTRACTION = False

Expand Down Expand Up @@ -2837,6 +2841,13 @@ def addconstraint_sql(self, expression: exp.AddConstraint) -> str:

def distinct_sql(self, expression: exp.Distinct) -> str:
this = self.expressions(expression, flat=True)

if not self.MULTI_ARG_DISTINCT and len(expression.expressions) > 1:
case = exp.case()
for arg in expression.expressions:
case = case.when(arg.is_(exp.null()), exp.null())
this = self.sql(case.else_(f"({this})"))

this = f" {this}" if this else ""

on = self.sql(expression, "on")
Expand Down
15 changes: 15 additions & 0 deletions tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,21 @@ def test_spark(self):
"SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')",
)

self.validate_all(
"WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
write={
"clickhouse": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"databricks": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"doris": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"duckdb": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl",
"hive": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"mysql": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"postgres": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl",
"presto": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl",
"snowflake": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
"spark": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl",
},
)
self.validate_all(
"SELECT TO_UTC_TIMESTAMP('2016-08-31', 'Asia/Seoul')",
write={
Expand Down

0 comments on commit 85073d1

Please sign in to comment.