From 7de03fd55365490b46fb8ee497b1aacd56f52f5c Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 15 Jun 2023 18:34:31 +0300 Subject: [PATCH 1/8] Refactor!: move normalization logic in Dialect, handle BigQuery case-insensitivity --- sqlglot/dataframe/sql/readwriter.py | 4 +-- sqlglot/dialects/bigquery.py | 1 + sqlglot/dialects/dialect.py | 27 ++++++++++++----- sqlglot/dialects/snowflake.py | 1 + sqlglot/generator.py | 4 +-- sqlglot/helper.py | 16 ++++++---- sqlglot/optimizer/normalize_identifiers.py | 25 +++++----------- sqlglot/schema.py | 7 ++--- tests/fixtures/optimizer/optimizer.sql | 35 ++++++++++++++++++++++ tests/test_schema.py | 4 +++ 10 files changed, 85 insertions(+), 39 deletions(-) diff --git a/sqlglot/dataframe/sql/readwriter.py b/sqlglot/dataframe/sql/readwriter.py index cc2f181094..feddd15c01 100644 --- a/sqlglot/dataframe/sql/readwriter.py +++ b/sqlglot/dataframe/sql/readwriter.py @@ -4,7 +4,7 @@ import sqlglot from sqlglot import expressions as exp -from sqlglot.helper import object_to_dict, should_identify +from sqlglot.helper import can_identify, object_to_dict if t.TYPE_CHECKING: from sqlglot.dataframe.sql.dataframe import DataFrame @@ -26,7 +26,7 @@ def table(self, tableName: str) -> DataFrame: .from_(tableName) .select( *( - column if should_identify(column, "safe") else f'"{column}"' + column if can_identify(column, "safe") else f'"{column}"' for column in sqlglot.schema.column_names(tableName) ) ), diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 2166e65ea5..7629c3f646 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -105,6 +105,7 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: class BigQuery(Dialect): UNNEST_COLUMN_ONLY = True + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None TIME_MAPPING = { "%D": "%m/%d/%y", diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index fb333345c0..db1a91b7e1 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -4,6 +4,7 @@ from enum import Enum from sqlglot import exp +from sqlglot._typing import E from sqlglot.generator import Generator from sqlglot.helper import flatten, seq_get from sqlglot.parser import Parser @@ -11,14 +12,6 @@ from sqlglot.tokens import Token, Tokenizer, TokenType from sqlglot.trie import new_trie -if t.TYPE_CHECKING: - from sqlglot._typing import E - - -# Only Snowflake is currently known to resolve unquoted identifiers as uppercase. -# https://docs.snowflake.com/en/sql-reference/identifiers-syntax -RESOLVES_IDENTIFIERS_AS_UPPERCASE = {"snowflake"} - class Dialects(str, Enum): DIALECT = "" @@ -142,6 +135,10 @@ class Dialect(metaclass=_Dialect): # Determines whether or not the table alias comes after tablesample ALIAS_POST_TABLESAMPLE = False + # Determines whether or not unquoted identifiers are resolved as uppercase + # When set to None, it means that the dialect treats all identifiers as case-insensitive + RESOLVES_IDENTIFIERS_AS_UPPERCASE: t.Optional[bool] = False + # Determines whether or not an unquoted identifier can start with a digit IDENTIFIERS_CAN_START_WITH_DIGIT = False @@ -216,6 +213,20 @@ def format_time( return expression + @classmethod + def normalize_identifier(cls, expression: E) -> E: + if isinstance(expression, exp.Identifier) and ( + not expression.quoted or cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None + ): + expression.set( + "this", + expression.this.upper() + if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE + else expression.this.lower(), + ) + + return expression + def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: return self.parser(**opts).parse(self.tokenize(sql), sql) diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 148b6d82fd..a8bea8e6df 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -167,6 +167,7 @@ def _parse_convert_timezone(args: t.List) -> exp.Expression: class Snowflake(Dialect): + RESOLVES_IDENTIFIERS_AS_UPPERCASE = True NULL_ORDERING = "nulls_are_large" TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" diff --git a/sqlglot/generator.py b/sqlglot/generator.py index add67f7cd1..55899042b2 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -5,7 +5,7 @@ from sqlglot import exp from sqlglot.errors import ErrorLevel, UnsupportedError, concat_messages -from sqlglot.helper import apply_index_offset, csv, seq_get, should_identify +from sqlglot.helper import apply_index_offset, can_identify, csv, seq_get from sqlglot.time import format_time from sqlglot.tokens import TokenType @@ -886,7 +886,7 @@ def identifier_sql(self, expression: exp.Identifier) -> str: text = text.replace(self.IDENTIFIER_END, self._escaped_identifier_end) if ( expression.quoted - or should_identify(text, self.identify) + or can_identify(text, self.identify) or lower in self.RESERVED_KEYWORDS or (not self.IDENTIFIERS_CAN_START_WITH_DIGIT and text[:1].isdigit()) ): diff --git a/sqlglot/helper.py b/sqlglot/helper.py index 2f48ab56af..6038eba7b9 100644 --- a/sqlglot/helper.py +++ b/sqlglot/helper.py @@ -434,24 +434,28 @@ def first(it: t.Iterable[T]) -> T: def case_sensitive(text: str, dialect: DialectType) -> bool: """Checks if text contains any case sensitive characters depending on dialect.""" - from sqlglot.dialects.dialect import RESOLVES_IDENTIFIERS_AS_UPPERCASE + from sqlglot.dialects.dialect import Dialect - unsafe = str.islower if dialect in RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper + dialect = Dialect.get_or_raise(dialect) + if dialect.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None: + return False + + unsafe = str.islower if dialect.RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper return any(unsafe(char) for char in text) -def should_identify(text: str, identify: str | bool, dialect: DialectType = None) -> bool: - """Checks if text should be identified given an identify option. +def can_identify(text: str, identify: str | bool, dialect: DialectType = None) -> bool: + """Checks if text can be identified given an identify option. Args: text: the text to check. identify: "always" or `True`: always returns true. "safe": true if there is no uppercase or lowercase character in `text`, depending on `dialect`. - dialect: the dialect to use in order to decide whether a text should be identified. + dialect: the dialect to use in order to decide whether a text can be identified. Returns: - Whether or not a string should be identified. + Whether or not a string can be identified. """ if identify is True or identify == "always": return True diff --git a/sqlglot/optimizer/normalize_identifiers.py b/sqlglot/optimizer/normalize_identifiers.py index 1e5c104242..99e605d3c8 100644 --- a/sqlglot/optimizer/normalize_identifiers.py +++ b/sqlglot/optimizer/normalize_identifiers.py @@ -1,12 +1,15 @@ -from sqlglot import exp from sqlglot._typing import E -from sqlglot.dialects.dialect import RESOLVES_IDENTIFIERS_AS_UPPERCASE, DialectType +from sqlglot.dialects.dialect import Dialect, DialectType def normalize_identifiers(expression: E, dialect: DialectType = None) -> E: """ - Normalize all unquoted identifiers to either lower or upper case, depending on - the dialect. This essentially makes those identifiers case-insensitive. + Normalize all unquoted identifiers to either lower or upper case, depending + on the dialect. This essentially makes those identifiers case-insensitive. + + Note: + Some dialects (e.g. BigQuery) treat identifiers as case-insensitive even + when they're quoted, so in these cases all identifiers are normalized. Example: >>> import sqlglot @@ -21,16 +24,4 @@ def normalize_identifiers(expression: E, dialect: DialectType = None) -> E: Returns: The transformed expression. """ - return expression.transform(_normalize, dialect, copy=False) - - -def _normalize(node: exp.Expression, dialect: DialectType = None) -> exp.Expression: - if isinstance(node, exp.Identifier) and not node.quoted: - node.set( - "this", - node.this.upper() - if dialect in RESOLVES_IDENTIFIERS_AS_UPPERCASE - else node.this.lower(), - ) - - return node + return expression.transform(Dialect.get_or_raise(dialect).normalize_identifier, copy=False) diff --git a/sqlglot/schema.py b/sqlglot/schema.py index f73adeeb14..c5d10341fc 100644 --- a/sqlglot/schema.py +++ b/sqlglot/schema.py @@ -6,7 +6,7 @@ import sqlglot from sqlglot import expressions as exp from sqlglot._typing import T -from sqlglot.dialects.dialect import RESOLVES_IDENTIFIERS_AS_UPPERCASE +from sqlglot.dialects.dialect import Dialect from sqlglot.errors import ParseError, SchemaError from sqlglot.helper import dict_depth from sqlglot.trie import in_trie, new_trie @@ -335,11 +335,10 @@ def _normalize_name(self, name: str | exp.Identifier, dialect: DialectType = Non return name if isinstance(name, str) else name.name name = identifier.name - - if not self.normalize or identifier.quoted: + if not self.normalize: return name - return name.upper() if dialect in RESOLVES_IDENTIFIERS_AS_UPPERCASE else name.lower() + return Dialect.get_or_raise(dialect).normalize_identifier(identifier).name def _depth(self) -> int: # The columns themselves are a mapping, but we don't want to include those diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 0cb1a58de1..e91d355854 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -646,3 +646,38 @@ CROSS JOIN LATERAL ( "l"."log_date" DESC NULLS LAST LIMIT 1 ) AS "l"; + +# title: bigquery identifiers are case-insensitive +# execute: false +# dialect: bigquery +WITH cte AS ( + SELECT + refresh_date AS `reFREsh_date`, + term AS `TeRm`, + `rank` + FROM `bigquery-public-data.google_trends.top_terms` +) +SELECT + refresh_date AS `Day`, + term AS Top_Term, + rank, +FROM cte +WHERE + rank = 1 + AND refresh_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 WEEK) +GROUP BY `dAy`, `top_term`, rank +ORDER BY `DaY` DESC; +SELECT + `top_terms`.`refresh_date` AS `day`, + `top_terms`.`term` AS `top_term`, + `top_terms`.`rank` AS `rank` +FROM `bigquery-public-data`.`google_trends`.`top_terms` AS `top_terms` +WHERE + `top_terms`.`rank` = 1 + AND CAST(`top_terms`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) +GROUP BY + `top_terms`.`refresh_date`, + `top_terms`.`term`, + `top_terms`.`rank` +ORDER BY + `day` DESC; diff --git a/tests/test_schema.py b/tests/test_schema.py index b03e7e7c7c..e43d830856 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -229,3 +229,7 @@ def test_schema_normalization(self): # Check that the correct dialect is used when calling schema methods schema = MappingSchema(schema={"[Fo]": {"x": "int"}}, dialect="tsql") self.assertEqual(schema.column_names("[Fo]"), schema.column_names("`Fo`", dialect="spark")) + + # Check that all identifiers are normalized to lowercase for BigQuery, even quoted ones + schema = MappingSchema(schema={"`Foo`": {"BaR": "int"}}, dialect="bigquery") + self.assertEqual(schema.column_names("foo"), ["bar"]) From ac55e5aa7d156e107ebd21030c1cd232d130e2b3 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 15 Jun 2023 19:18:00 +0300 Subject: [PATCH 2/8] Move case_sensitive and _quote in Dialect to reduce overhead --- sqlglot/dialects/dialect.py | 19 +++++++++++++++++++ sqlglot/helper.py | 18 +++++------------- sqlglot/optimizer/qualify_columns.py | 20 +++++--------------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index db1a91b7e1..c4f5a847dc 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -227,6 +227,25 @@ def normalize_identifier(cls, expression: E) -> E: return expression + @classmethod + def case_sensitive(cls, text: str) -> bool: + if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None: + return False + + unsafe = str.islower if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper + return any(unsafe(char) for char in text) + + @classmethod + def quote_identifier(cls, expression: E, identify: bool = True) -> E: + if isinstance(expression, exp.Identifier): + name = expression.this + expression.set( + "quoted", + identify or cls.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), + ) + + return expression + def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: return self.parser(**opts).parse(self.tokenize(sql), sql) diff --git a/sqlglot/helper.py b/sqlglot/helper.py index 6038eba7b9..8ec9308038 100644 --- a/sqlglot/helper.py +++ b/sqlglot/helper.py @@ -432,18 +432,6 @@ def first(it: t.Iterable[T]) -> T: return next(i for i in it) -def case_sensitive(text: str, dialect: DialectType) -> bool: - """Checks if text contains any case sensitive characters depending on dialect.""" - from sqlglot.dialects.dialect import Dialect - - dialect = Dialect.get_or_raise(dialect) - if dialect.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None: - return False - - unsafe = str.islower if dialect.RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper - return any(unsafe(char) for char in text) - - def can_identify(text: str, identify: str | bool, dialect: DialectType = None) -> bool: """Checks if text can be identified given an identify option. @@ -459,6 +447,10 @@ def can_identify(text: str, identify: str | bool, dialect: DialectType = None) - """ if identify is True or identify == "always": return True + if identify == "safe": - return not case_sensitive(text, dialect) + from sqlglot.dialects.dialect import Dialect + + return not Dialect.get_or_raise(dialect).case_sensitive(text) + return False diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index aba9a7e370..ac8eb0f59d 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -5,9 +5,9 @@ from sqlglot import alias, exp from sqlglot._typing import E -from sqlglot.dialects.dialect import DialectType +from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.errors import OptimizeError -from sqlglot.helper import case_sensitive, seq_get +from sqlglot.helper import seq_get from sqlglot.optimizer.scope import Scope, traverse_scope, walk_in_scope from sqlglot.schema import Schema, ensure_schema @@ -417,19 +417,9 @@ def _qualify_outputs(scope): def quote_identifiers(expression: E, dialect: DialectType = None, identify: bool = True) -> E: """Makes sure all identifiers that need to be quoted are quoted.""" - - def _quote(expression: E) -> E: - if isinstance(expression, exp.Identifier): - name = expression.this - expression.set( - "quoted", - identify - or case_sensitive(name, dialect=dialect) - or not exp.SAFE_IDENTIFIER_RE.match(name), - ) - return expression - - return expression.transform(_quote, copy=False) + return expression.transform( + Dialect.get_or_raise(dialect).quote_identifier, identify=identify, copy=False + ) class Resolver: From f3c754dd334cb97f4a73b3adf8646e1e0452e087 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 15 Jun 2023 20:12:24 +0300 Subject: [PATCH 3/8] Move can_identify to Dialect, pass it down to Generator as a dialect_property --- sqlglot/dataframe/sql/readwriter.py | 5 +++-- sqlglot/dialects/dialect.py | 28 ++++++++++++++++++++++++++++ sqlglot/generator.py | 6 ++++-- sqlglot/helper.py | 25 ------------------------- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/sqlglot/dataframe/sql/readwriter.py b/sqlglot/dataframe/sql/readwriter.py index feddd15c01..7da2901aa1 100644 --- a/sqlglot/dataframe/sql/readwriter.py +++ b/sqlglot/dataframe/sql/readwriter.py @@ -4,7 +4,8 @@ import sqlglot from sqlglot import expressions as exp -from sqlglot.helper import can_identify, object_to_dict +from sqlglot.dialects import Spark +from sqlglot.helper import object_to_dict if t.TYPE_CHECKING: from sqlglot.dataframe.sql.dataframe import DataFrame @@ -26,7 +27,7 @@ def table(self, tableName: str) -> DataFrame: .from_(tableName) .select( *( - column if can_identify(column, "safe") else f'"{column}"' + column if Spark.can_identify(column, "safe") else f'"{column}"' for column in sqlglot.schema.column_names(tableName) ) ), diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index c4f5a847dc..d46b12f23a 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -108,6 +108,7 @@ def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[ }, "STRING_ESCAPE": klass.tokenizer_class.STRING_ESCAPES[0], "IDENTIFIER_ESCAPE": klass.tokenizer_class.IDENTIFIER_ESCAPES[0], + "can_identify": klass.can_identify, } if enum not in ("", "bigquery"): @@ -215,6 +216,11 @@ def format_time( @classmethod def normalize_identifier(cls, expression: E) -> E: + """ + Normalizes an unquoted identifier to either lower or upper case, thus essentially + making it case-insensitive. If a dialect treats all identifiers as case-insensitive, + they will be normalized regardless of being quoted or not. + """ if isinstance(expression, exp.Identifier) and ( not expression.quoted or cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None ): @@ -229,12 +235,34 @@ def normalize_identifier(cls, expression: E) -> E: @classmethod def case_sensitive(cls, text: str) -> bool: + """Checks if text contains any case sensitive characters, based on the dialect's rules.""" if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None: return False unsafe = str.islower if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper return any(unsafe(char) for char in text) + @classmethod + def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: + """Checks if text can be identified given an identify option. + + Args: + text: The text to check. + identify: + "always" or `True`: Always returns true. + "safe": True if the identifier is case-insensitive. + + Returns: + Whether or not the given text can be identified. + """ + if identify is True or identify == "always": + return True + + if identify == "safe": + return not cls.case_sensitive(text) + + return False + @classmethod def quote_identifier(cls, expression: E, identify: bool = True) -> E: if isinstance(expression, exp.Identifier): diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 55899042b2..6882aaea12 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -5,7 +5,7 @@ from sqlglot import exp from sqlglot.errors import ErrorLevel, UnsupportedError, concat_messages -from sqlglot.helper import apply_index_offset, can_identify, csv, seq_get +from sqlglot.helper import apply_index_offset, csv, seq_get from sqlglot.time import format_time from sqlglot.tokens import TokenType @@ -266,6 +266,8 @@ class Generator: NORMALIZE_FUNCTIONS: bool | str = "upper" NULL_ORDERING = "nulls_are_small" + can_identify: t.Callable[[str, str | bool], bool] = lambda *_: False + # Delimiters for quotes, identifiers and the corresponding escape characters QUOTE_START = "'" QUOTE_END = "'" @@ -886,7 +888,7 @@ def identifier_sql(self, expression: exp.Identifier) -> str: text = text.replace(self.IDENTIFIER_END, self._escaped_identifier_end) if ( expression.quoted - or can_identify(text, self.identify) + or self.can_identify(text, self.identify) or lower in self.RESERVED_KEYWORDS or (not self.IDENTIFIERS_CAN_START_WITH_DIGIT and text[:1].isdigit()) ): diff --git a/sqlglot/helper.py b/sqlglot/helper.py index 8ec9308038..327b68fe59 100644 --- a/sqlglot/helper.py +++ b/sqlglot/helper.py @@ -14,7 +14,6 @@ if t.TYPE_CHECKING: from sqlglot import exp from sqlglot._typing import E, T - from sqlglot.dialects.dialect import DialectType from sqlglot.expressions import Expression CAMEL_CASE_PATTERN = re.compile("(? T: Useful for sets. """ return next(i for i in it) - - -def can_identify(text: str, identify: str | bool, dialect: DialectType = None) -> bool: - """Checks if text can be identified given an identify option. - - Args: - text: the text to check. - identify: - "always" or `True`: always returns true. - "safe": true if there is no uppercase or lowercase character in `text`, depending on `dialect`. - dialect: the dialect to use in order to decide whether a text can be identified. - - Returns: - Whether or not a string can be identified. - """ - if identify is True or identify == "always": - return True - - if identify == "safe": - from sqlglot.dialects.dialect import Dialect - - return not Dialect.get_or_raise(dialect).case_sensitive(text) - - return False From d5a1305d120beb7d2e8df5d5e34aa863c9d9915e Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 15 Jun 2023 22:47:45 +0300 Subject: [PATCH 4/8] Update other dialects that have case-insensitive identifiers too --- sqlglot/dataframe/README.md | 18 +++++++++++------- sqlglot/dataframe/sql/column.py | 5 +++++ sqlglot/dataframe/sql/dataframe.py | 1 + sqlglot/dataframe/sql/normalize.py | 2 ++ sqlglot/dataframe/sql/readwriter.py | 11 ++++------- sqlglot/dialects/bigquery.py | 2 ++ sqlglot/dialects/dialect.py | 3 ++- sqlglot/dialects/duckdb.py | 3 +++ sqlglot/dialects/hive.py | 3 +++ sqlglot/dialects/presto.py | 5 +++++ sqlglot/dialects/redshift.py | 3 +++ sqlglot/dialects/sqlite.py | 3 +++ sqlglot/generator.py | 2 +- .../integration/dataframe_validator.py | 6 +++--- tests/dataframe/unit/test_dataframe_writer.py | 8 ++++---- tests/dataframe/unit/test_session.py | 14 +++++++------- tests/test_schema.py | 6 ++++-- 17 files changed, 63 insertions(+), 32 deletions(-) diff --git a/sqlglot/dataframe/README.md b/sqlglot/dataframe/README.md index 02179f4b9e..86fdc4b045 100644 --- a/sqlglot/dataframe/README.md +++ b/sqlglot/dataframe/README.md @@ -9,7 +9,7 @@ Currently many of the common operations are covered and more functionality will ## Instructions * [Install SQLGlot](https://github.com/tobymao/sqlglot/blob/main/README.md#install) and that is all that is required to just generate SQL. [The examples](#examples) show generating SQL and then executing that SQL on a specific engine and that will require that engine's client library. * Find/replace all `from pyspark.sql` with `from sqlglot.dataframe`. -* Prior to any `spark.read.table` or `spark.table` run `sqlglot.schema.add_table('', )`. +* Prior to any `spark.read.table` or `spark.table` run `sqlglot.schema.add_table('', , dialect="spark")`. * The column structure can be defined the following ways: * Dictionary where the keys are column names and values are string of the Spark SQL type name. * Ex: `{'cola': 'string', 'colb': 'int'}` @@ -33,12 +33,16 @@ import sqlglot from sqlglot.dataframe.sql.session import SparkSession from sqlglot.dataframe.sql import functions as F -sqlglot.schema.add_table('employee', { - 'employee_id': 'INT', - 'fname': 'STRING', - 'lname': 'STRING', - 'age': 'INT', -}) # Register the table structure prior to reading from the table +sqlglot.schema.add_table( + 'employee', + { + 'employee_id': 'INT', + 'fname': 'STRING', + 'lname': 'STRING', + 'age': 'INT', + }, + dialect="spark", +) # Register the table structure prior to reading from the table spark = SparkSession() diff --git a/sqlglot/dataframe/sql/column.py b/sqlglot/dataframe/sql/column.py index a8b89d1a72..f4cfebaf2d 100644 --- a/sqlglot/dataframe/sql/column.py +++ b/sqlglot/dataframe/sql/column.py @@ -5,6 +5,7 @@ import sqlglot from sqlglot import expressions as exp from sqlglot.dataframe.sql.types import DataType +from sqlglot.dialects import Spark from sqlglot.helper import flatten, is_iterable if t.TYPE_CHECKING: @@ -22,6 +23,10 @@ def __init__(self, expression: t.Optional[t.Union[ColumnOrLiteral, exp.Expressio expression = sqlglot.maybe_parse(expression, dialect="spark") if expression is None: raise ValueError(f"Could not parse {expression}") + + if isinstance(expression, exp.Column): + expression.transform(Spark.normalize_identifier, copy=False) + self.expression: exp.Expression = expression def __repr__(self): diff --git a/sqlglot/dataframe/sql/dataframe.py b/sqlglot/dataframe/sql/dataframe.py index 3fc923238f..64cceeac02 100644 --- a/sqlglot/dataframe/sql/dataframe.py +++ b/sqlglot/dataframe/sql/dataframe.py @@ -316,6 +316,7 @@ def sql(self, dialect="spark", optimize=True, **kwargs) -> t.List[str]: expression.alias_or_name: expression.type.sql("spark") for expression in select_expression.expressions }, + dialect="spark", ) cache_storage_level = select_expression.args["cache_storage_level"] options = [ diff --git a/sqlglot/dataframe/sql/normalize.py b/sqlglot/dataframe/sql/normalize.py index 75feba7c29..4eec782429 100644 --- a/sqlglot/dataframe/sql/normalize.py +++ b/sqlglot/dataframe/sql/normalize.py @@ -5,6 +5,7 @@ from sqlglot import expressions as exp from sqlglot.dataframe.sql.column import Column from sqlglot.dataframe.sql.util import get_tables_from_expression_with_join +from sqlglot.dialects import Spark from sqlglot.helper import ensure_list NORMALIZE_INPUT = t.TypeVar("NORMALIZE_INPUT", bound=t.Union[str, exp.Expression, Column]) @@ -19,6 +20,7 @@ def normalize(spark: SparkSession, expression_context: exp.Select, expr: t.List[ for expression in expressions: identifiers = expression.find_all(exp.Identifier) for identifier in identifiers: + Spark.normalize_identifier(identifier) replace_alias_name_with_cte_name(spark, expression_context, identifier) replace_branch_and_sequence_ids_with_cte_name(spark, expression_context, identifier) diff --git a/sqlglot/dataframe/sql/readwriter.py b/sqlglot/dataframe/sql/readwriter.py index 7da2901aa1..9d87d4a785 100644 --- a/sqlglot/dataframe/sql/readwriter.py +++ b/sqlglot/dataframe/sql/readwriter.py @@ -19,17 +19,14 @@ def __init__(self, spark: SparkSession): def table(self, tableName: str) -> DataFrame: from sqlglot.dataframe.sql.dataframe import DataFrame - sqlglot.schema.add_table(tableName) + sqlglot.schema.add_table(tableName, dialect="spark") return DataFrame( self.spark, exp.Select() - .from_(tableName) + .from_(exp.to_table(tableName, dialect="spark").transform(Spark.normalize_identifier)) .select( - *( - column if Spark.can_identify(column, "safe") else f'"{column}"' - for column in sqlglot.schema.column_names(tableName) - ) + *(column for column in sqlglot.schema.column_names(tableName, dialect="spark")) ), ) @@ -74,7 +71,7 @@ def insertInto(self, tableName: str, overwrite: t.Optional[bool] = None) -> Data ) df = self._df.copy(output_expression_container=output_expression_container) if self._by_name: - columns = sqlglot.schema.column_names(tableName, only_visible=True) + columns = sqlglot.schema.column_names(tableName, only_visible=True, dialect="spark") df = df._convert_leaf_to_cte().select(*columns) return self.copy(_df=df) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 7629c3f646..a2c81e544d 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -105,6 +105,8 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: class BigQuery(Dialect): UNNEST_COLUMN_ONLY = True + + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity RESOLVES_IDENTIFIERS_AS_UPPERCASE = None TIME_MAPPING = { diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index d46b12f23a..0e25b9bcfd 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -108,7 +108,6 @@ def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[ }, "STRING_ESCAPE": klass.tokenizer_class.STRING_ESCAPES[0], "IDENTIFIER_ESCAPE": klass.tokenizer_class.IDENTIFIER_ESCAPES[0], - "can_identify": klass.can_identify, } if enum not in ("", "bigquery"): @@ -123,6 +122,8 @@ def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[ if not klass.STRICT_STRING_CONCAT: klass.parser_class.BITWISE[TokenType.DPIPE] = exp.SafeDPipe + klass.generator_class.can_identify = klass.can_identify + return klass diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 4aa5035fdc..164b212b73 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -88,6 +88,9 @@ def _regexp_extract_sql(self: generator.Generator, expression: exp.RegexpExtract class DuckDB(Dialect): NULL_ORDERING = "nulls_are_last" + # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 8847119515..eeba60ed8b 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -153,6 +153,9 @@ class Hive(Dialect): ALIAS_POST_TABLESAMPLE = True IDENTIFIERS_CAN_START_WITH_DIGIT = True + # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + TIME_MAPPING = { "y": "%Y", "Y": "%Y", diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index f71515159d..265780e4e8 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -172,6 +172,11 @@ class Presto(Dialect): TIME_MAPPING = MySQL.TIME_MAPPING STRICT_STRING_CONCAT = True + # https://github.com/trinodb/trino/issues/17 + # https://github.com/trinodb/trino/issues/12289 + # https://github.com/prestodb/presto/issues/2863 + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index a7e25fae0d..db6cc3f153 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -14,6 +14,9 @@ def _json_sql(self: Postgres.Generator, expression: exp.JSONExtract | exp.JSONEx class Redshift(Postgres): + # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" TIME_MAPPING = { **Postgres.TIME_MAPPING, diff --git a/sqlglot/dialects/sqlite.py b/sqlglot/dialects/sqlite.py index 3b837ea3f8..803f361e8b 100644 --- a/sqlglot/dialects/sqlite.py +++ b/sqlglot/dialects/sqlite.py @@ -59,6 +59,9 @@ def _transform_create(expression: exp.Expression) -> exp.Expression: class SQLite(Dialect): + # https://sqlite.org/forum/forumpost/5e575586ac5c711b?raw + RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + class Tokenizer(tokens.Tokenizer): IDENTIFIERS = ['"', ("[", "]"), "`"] HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", ""), ("0X", "")] diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 6882aaea12..7f7d5dec0a 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -266,7 +266,7 @@ class Generator: NORMALIZE_FUNCTIONS: bool | str = "upper" NULL_ORDERING = "nulls_are_small" - can_identify: t.Callable[[str, str | bool], bool] = lambda *_: False + can_identify: t.Callable[[str, str | bool], bool] # Delimiters for quotes, identifiers and the corresponding escape characters QUOTE_START = "'" diff --git a/tests/dataframe/integration/dataframe_validator.py b/tests/dataframe/integration/dataframe_validator.py index c84a34282f..22d4982444 100644 --- a/tests/dataframe/integration/dataframe_validator.py +++ b/tests/dataframe/integration/dataframe_validator.py @@ -135,9 +135,9 @@ def setUpClass(cls): data=district_data, schema=cls.sqlglot_district_schema ) cls.df_district.createOrReplaceTempView("district") - sqlglot.schema.add_table("employee", cls.sqlglot_employee_schema) - sqlglot.schema.add_table("store", cls.sqlglot_store_schema) - sqlglot.schema.add_table("district", cls.sqlglot_district_schema) + sqlglot.schema.add_table("employee", cls.sqlglot_employee_schema, dialect="spark") + sqlglot.schema.add_table("store", cls.sqlglot_store_schema, dialect="spark") + sqlglot.schema.add_table("district", cls.sqlglot_district_schema, dialect="spark") def setUp(self) -> None: warnings.filterwarnings("ignore", category=ResourceWarning) diff --git a/tests/dataframe/unit/test_dataframe_writer.py b/tests/dataframe/unit/test_dataframe_writer.py index 3f45468398..303d2f987e 100644 --- a/tests/dataframe/unit/test_dataframe_writer.py +++ b/tests/dataframe/unit/test_dataframe_writer.py @@ -30,7 +30,7 @@ def test_insertInto_overwrite(self): @mock.patch("sqlglot.schema", MappingSchema()) def test_insertInto_byName(self): - sqlglot.schema.add_table("table_name", {"employee_id": "INT"}) + sqlglot.schema.add_table("table_name", {"employee_id": "INT"}, dialect="spark") df = self.df_employee.write.byName.insertInto("table_name") expected = "INSERT INTO table_name SELECT `a1`.`employee_id` AS `employee_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)" self.compare_sql(df, expected) @@ -88,8 +88,8 @@ def test_saveAsTable_cache(self): self.compare_sql(df, expected_statements) def test_quotes(self): - sqlglot.schema.add_table('"Test"', {'"ID"': "STRING"}) - df = self.spark.table('"Test"') + sqlglot.schema.add_table("`Test`", {"`ID`": "STRING"}, dialect="spark") + df = self.spark.table("`Test`") self.compare_sql( - df.select(df['"ID"']), ["SELECT `Test`.`ID` AS `ID` FROM `Test` AS `Test`"] + df.select(df["`ID`"]), ["SELECT `test`.`id` AS `id` FROM `test` AS `test`"] ) diff --git a/tests/dataframe/unit/test_session.py b/tests/dataframe/unit/test_session.py index 0970a2e88f..4c275e9c0d 100644 --- a/tests/dataframe/unit/test_session.py +++ b/tests/dataframe/unit/test_session.py @@ -71,7 +71,7 @@ def test_typed_schema_nested(self): @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_select_only(self): query = "SELECT cola, colb FROM table" - sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) + sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query) self.assertEqual( "SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`", @@ -80,17 +80,17 @@ def test_sql_select_only(self): @mock.patch("sqlglot.schema", MappingSchema()) def test_select_quoted(self): - sqlglot.schema.add_table('"TEST"', {"name": "string"}) + sqlglot.schema.add_table("`TEST`", {"name": "string"}, dialect="spark") self.assertEqual( - SparkSession().table('"TEST"').select(F.col("name")).sql(dialect="snowflake")[0], - '''SELECT "TEST"."name" AS "name" FROM "TEST" AS "TEST"''', + SparkSession().table("`TEST`").select(F.col("name")).sql(dialect="snowflake")[0], + '''SELECT "test"."name" AS "name" FROM "test" AS "test"''', ) @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_with_aggs(self): query = "SELECT cola, colb FROM table" - sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) + sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query).groupBy(F.col("cola")).agg(F.sum("colb")) self.assertEqual( "WITH t38189 AS (SELECT cola, colb FROM table), t42330 AS (SELECT cola, colb FROM t38189) SELECT cola, SUM(colb) FROM t42330 GROUP BY cola", @@ -100,7 +100,7 @@ def test_sql_with_aggs(self): @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_create(self): query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1" - sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) + sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query) expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" self.compare_sql(df, expected) @@ -108,7 +108,7 @@ def test_sql_create(self): @mock.patch("sqlglot.schema", MappingSchema()) def test_sql_insert(self): query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1" - sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}) + sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark") df = self.spark.sql(query) expected = "INSERT INTO new_table SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`" self.compare_sql(df, expected) diff --git a/tests/test_schema.py b/tests/test_schema.py index e43d830856..bffad376dc 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -201,7 +201,7 @@ def test_schema_get_column_type(self): def test_schema_normalization(self): schema = MappingSchema( schema={"x": {"`y`": {"Z": {"a": "INT", "`B`": "VARCHAR"}, "w": {"C": "INT"}}}}, - dialect="spark", + dialect="clickhouse", ) table_z = exp.Table(this="z", db="y", catalog="x") @@ -228,7 +228,9 @@ def test_schema_normalization(self): # Check that the correct dialect is used when calling schema methods schema = MappingSchema(schema={"[Fo]": {"x": "int"}}, dialect="tsql") - self.assertEqual(schema.column_names("[Fo]"), schema.column_names("`Fo`", dialect="spark")) + self.assertEqual( + schema.column_names("[Fo]"), schema.column_names("`Fo`", dialect="clickhouse") + ) # Check that all identifiers are normalized to lowercase for BigQuery, even quoted ones schema = MappingSchema(schema={"`Foo`": {"BaR": "int"}}, dialect="bigquery") From 4df298bacae81a439d1560f028b8c4d3d496995a Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 16 Jun 2023 14:48:11 +0300 Subject: [PATCH 5/8] Implement heuristic for treating tables as case-sensitive --- sqlglot/dialects/bigquery.py | 12 ++++++++++++ tests/fixtures/optimizer/optimizer.sql | 22 +++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index a2c81e544d..d6b718e07f 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -4,6 +4,7 @@ import typing as t from sqlglot import exp, generator, parser, tokens, transforms +from sqlglot._typing import E from sqlglot.dialects.dialect import ( Dialect, datestrtodate_sql, @@ -129,6 +130,17 @@ class BigQuery(Dialect): "TZH": "%z", } + @classmethod + def normalize_identifier(cls, expression: E) -> E: + # In BigQuery CTEs are not case-sensitive, but table names are. The following check is + # essentially a heuristic to detect tables based on whether or not they're qualified. + if isinstance(expression, exp.Identifier) and not ( + isinstance(expression.parent, exp.Table) and len(expression.parent.parts) >= 1 + ): + expression.set("this", expression.this.lower()) + + return expression + class Tokenizer(tokens.Tokenizer): QUOTES = ["'", '"', '"""', "'''"] COMMENTS = ["--", "#", ("/*", "*/")] diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index e91d355854..214535ab79 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -647,7 +647,7 @@ CROSS JOIN LATERAL ( LIMIT 1 ) AS "l"; -# title: bigquery identifiers are case-insensitive +# title: bigquery column identifiers are case-insensitive # execute: false # dialect: bigquery WITH cte AS ( @@ -655,7 +655,7 @@ WITH cte AS ( refresh_date AS `reFREsh_date`, term AS `TeRm`, `rank` - FROM `bigquery-public-data.google_trends.top_terms` + FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` ) SELECT refresh_date AS `Day`, @@ -668,16 +668,16 @@ WHERE GROUP BY `dAy`, `top_term`, rank ORDER BY `DaY` DESC; SELECT - `top_terms`.`refresh_date` AS `day`, - `top_terms`.`term` AS `top_term`, - `top_terms`.`rank` AS `rank` -FROM `bigquery-public-data`.`google_trends`.`top_terms` AS `top_terms` + `TOp_TeRmS`.`refresh_date` AS `day`, + `TOp_TeRmS`.`term` AS `top_term`, + `TOp_TeRmS`.`rank` AS `rank` +FROM `bigquery-public-data`.`GooGle_tReNDs`.`TOp_TeRmS` AS `TOp_TeRmS` WHERE - `top_terms`.`rank` = 1 - AND CAST(`top_terms`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) + `TOp_TeRmS`.`rank` = 1 + AND CAST(`TOp_TeRmS`.`refresh_date` AS DATE) >= DATE_SUB(CURRENT_DATE, INTERVAL 2 WEEK) GROUP BY - `top_terms`.`refresh_date`, - `top_terms`.`term`, - `top_terms`.`rank` + `TOp_TeRmS`.`refresh_date`, + `TOp_TeRmS`.`term`, + `TOp_TeRmS`.`rank` ORDER BY `day` DESC; From c365bf865d55ca6e2bd8b2105ef067455b0b7827 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 16 Jun 2023 15:18:21 +0300 Subject: [PATCH 6/8] Fix the heuristic condition --- sqlglot/dialects/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index d6b718e07f..9611273dde 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -135,7 +135,7 @@ def normalize_identifier(cls, expression: E) -> E: # In BigQuery CTEs are not case-sensitive, but table names are. The following check is # essentially a heuristic to detect tables based on whether or not they're qualified. if isinstance(expression, exp.Identifier) and not ( - isinstance(expression.parent, exp.Table) and len(expression.parent.parts) >= 1 + isinstance(expression.parent, exp.Table) and len(expression.parent.parts) > 1 ): expression.set("this", expression.this.lower()) From 525fe2e775d4ba55b83789dc3ebfaf60161d3b2c Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 16 Jun 2023 16:56:28 +0300 Subject: [PATCH 7/8] Improve condition --- sqlglot/dialects/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 9611273dde..0da1844921 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -135,7 +135,7 @@ def normalize_identifier(cls, expression: E) -> E: # In BigQuery CTEs are not case-sensitive, but table names are. The following check is # essentially a heuristic to detect tables based on whether or not they're qualified. if isinstance(expression, exp.Identifier) and not ( - isinstance(expression.parent, exp.Table) and len(expression.parent.parts) > 1 + isinstance(expression.parent, exp.Table) and expression.parent.db ): expression.set("this", expression.this.lower()) From 4f3f4aec6498432234ceb44b00a2a74ebfd3ad5c Mon Sep 17 00:00:00 2001 From: George Sittas Date: Fri, 16 Jun 2023 19:04:06 +0300 Subject: [PATCH 8/8] Add snowflake identifier docs in a comment --- sqlglot/dialects/snowflake.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index a8bea8e6df..8d6b496bf0 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -167,6 +167,7 @@ def _parse_convert_timezone(args: t.List) -> exp.Expression: class Snowflake(Dialect): + # https://docs.snowflake.com/en/sql-reference/identifiers-syntax RESOLVES_IDENTIFIERS_AS_UPPERCASE = True NULL_ORDERING = "nulls_are_large" TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"