diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2502abe9d..6c8130dc4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust Toolchain uses: ./.github/actions/setup-builder - - run: cargo fmt -- --check + - run: cargo fmt --all -- --check lint: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index c4d0094f4..8ff0ceb55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.52.0" +version = "0.53.0" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/datafusion-sqlparser-rs" documentation = "https://docs.rs/sqlparser/" @@ -37,8 +37,9 @@ name = "sqlparser" path = "src/lib.rs" [features] -default = ["std"] +default = ["std", "recursive-protection"] std = [] +recursive-protection = ["std", "recursive"] # Enable JSON output in the `cli` example: json_example = ["serde_json", "serde"] visitor = ["sqlparser_derive"] @@ -46,6 +47,8 @@ visitor = ["sqlparser_derive"] [dependencies] bigdecimal = { version = "0.4.1", features = ["serde"], optional = true } log = "0.4" +recursive = { version = "0.1.1", optional = true} + serde = { version = "1.0", features = ["derive"], optional = true } # serde_json is only used in examples/cli, but we have to put it outside # of dev-dependencies because of diff --git a/README.md b/README.md index fd676d115..41a44d3d7 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ The following optional [crate features](https://doc.rust-lang.org/cargo/referen * `serde`: Adds [Serde](https://serde.rs/) support by implementing `Serialize` and `Deserialize` for all AST nodes. * `visitor`: Adds a `Visitor` capable of recursively walking the AST tree. - +* `recursive-protection` (enabled by default), uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection. ## Syntax vs Semantics @@ -240,11 +240,14 @@ You can run them with: ``` git checkout main cd sqlparser_bench -cargo bench +cargo bench -- --save-baseline main git checkout -cargo bench +cargo bench -- --baseline main ``` +By adding the `--save-baseline main` and `--baseline main` you can track the +progress of your improvements as you continue working on the feature branch. + ## Licensing All code in this repository is licensed under the [Apache Software License 2.0](LICENSE.txt). diff --git a/changelog/0.53.0.md b/changelog/0.53.0.md new file mode 100644 index 000000000..5b9de07d3 --- /dev/null +++ b/changelog/0.53.0.md @@ -0,0 +1,95 @@ + + +# sqlparser-rs 0.53.0 Changelog + +This release consists of 47 commits from 16 contributors. See credits at the end of this changelog for more information. + +**Other:** + +- hive: support for special not expression `!a` and raise error for `a!` factorial operator [#1472](https://github.com/apache/datafusion-sqlparser-rs/pull/1472) (wugeer) +- Add support for MSSQL's `OPENJSON WITH` clause [#1498](https://github.com/apache/datafusion-sqlparser-rs/pull/1498) (gaoqiangz) +- Parse true and false as identifiers in mssql [#1510](https://github.com/apache/datafusion-sqlparser-rs/pull/1510) (lovasoa) +- Fix the parsing error in MSSQL for multiple statements that include `DECLARE` statements [#1497](https://github.com/apache/datafusion-sqlparser-rs/pull/1497) (wugeer) +- Add support for Snowflake SHOW DATABASES/SCHEMAS/TABLES/VIEWS/COLUMNS statements [#1501](https://github.com/apache/datafusion-sqlparser-rs/pull/1501) (yoavcloud) +- Add support of COMMENT ON syntax for Snowflake [#1516](https://github.com/apache/datafusion-sqlparser-rs/pull/1516) (git-hulk) +- Add support for MYSQL's `CREATE TABLE SELECT` expr [#1515](https://github.com/apache/datafusion-sqlparser-rs/pull/1515) (wugeer) +- Add support for MSSQL's `XQuery` methods [#1500](https://github.com/apache/datafusion-sqlparser-rs/pull/1500) (gaoqiangz) +- Add support for Hive's `LOAD DATA` expr [#1520](https://github.com/apache/datafusion-sqlparser-rs/pull/1520) (wugeer) +- Fix ClickHouse document link from `Russian` to `English` [#1527](https://github.com/apache/datafusion-sqlparser-rs/pull/1527) (git-hulk) +- Support ANTI and SEMI joins without LEFT/RIGHT [#1528](https://github.com/apache/datafusion-sqlparser-rs/pull/1528) (delamarch3) +- support sqlite's OR clauses in update statements [#1530](https://github.com/apache/datafusion-sqlparser-rs/pull/1530) (lovasoa) +- support column type definitions in table aliases [#1526](https://github.com/apache/datafusion-sqlparser-rs/pull/1526) (lovasoa) +- Add support for MSSQL's `JSON_ARRAY`/`JSON_OBJECT` expr [#1507](https://github.com/apache/datafusion-sqlparser-rs/pull/1507) (gaoqiangz) +- Add support for PostgreSQL `UNLISTEN` syntax and Add support for Postgres `LOAD extension` expr [#1531](https://github.com/apache/datafusion-sqlparser-rs/pull/1531) (wugeer) +- Parse byte/bit string literals in MySQL and Postgres [#1532](https://github.com/apache/datafusion-sqlparser-rs/pull/1532) (mvzink) +- Allow example CLI to read from stdin [#1536](https://github.com/apache/datafusion-sqlparser-rs/pull/1536) (mvzink) +- recursive select calls are parsed with bad trailing_commas parameter [#1521](https://github.com/apache/datafusion-sqlparser-rs/pull/1521) (tomershaniii) +- PartiQL queries in Redshift [#1534](https://github.com/apache/datafusion-sqlparser-rs/pull/1534) (yoavcloud) +- Include license file in sqlparser_derive crate [#1543](https://github.com/apache/datafusion-sqlparser-rs/pull/1543) (ankane) +- Fallback to identifier parsing if expression parsing fails [#1513](https://github.com/apache/datafusion-sqlparser-rs/pull/1513) (yoavcloud) +- support `json_object('k':'v')` in postgres [#1546](https://github.com/apache/datafusion-sqlparser-rs/pull/1546) (lovasoa) +- Document micro benchmarks [#1555](https://github.com/apache/datafusion-sqlparser-rs/pull/1555) (alamb) +- Implement `Spanned` to retrieve source locations on AST nodes [#1435](https://github.com/apache/datafusion-sqlparser-rs/pull/1435) (Nyrox) +- Fix error in benchmark queries [#1560](https://github.com/apache/datafusion-sqlparser-rs/pull/1560) (alamb) +- Fix clippy warnings on rust 1.83 [#1570](https://github.com/apache/datafusion-sqlparser-rs/pull/1570) (iffyio) +- Support relation visitor to visit the `Option` field [#1556](https://github.com/apache/datafusion-sqlparser-rs/pull/1556) (goldmedal) +- Rename `TokenWithLocation` to `TokenWithSpan`, in backwards compatible way [#1562](https://github.com/apache/datafusion-sqlparser-rs/pull/1562) (alamb) +- Support MySQL size variants for BLOB and TEXT columns [#1564](https://github.com/apache/datafusion-sqlparser-rs/pull/1564) (mvzink) +- Increase version of sqlparser_derive from 0.2.2 to 0.3.0 [#1571](https://github.com/apache/datafusion-sqlparser-rs/pull/1571) (alamb) +- `json_object('k' VALUE 'v')` in postgres [#1547](https://github.com/apache/datafusion-sqlparser-rs/pull/1547) (lovasoa) +- Support snowflake double dot notation for object name [#1540](https://github.com/apache/datafusion-sqlparser-rs/pull/1540) (ayman-sigma) +- Update comments / docs for `Spanned` [#1549](https://github.com/apache/datafusion-sqlparser-rs/pull/1549) (alamb) +- Support Databricks struct literal [#1542](https://github.com/apache/datafusion-sqlparser-rs/pull/1542) (ayman-sigma) +- Encapsulate CreateFunction [#1573](https://github.com/apache/datafusion-sqlparser-rs/pull/1573) (philipcristiano) +- Support BIT column types [#1577](https://github.com/apache/datafusion-sqlparser-rs/pull/1577) (mvzink) +- Support parsing optional nulls handling for unique constraint [#1567](https://github.com/apache/datafusion-sqlparser-rs/pull/1567) (mvzink) +- Fix displaying WORK or TRANSACTION after BEGIN [#1565](https://github.com/apache/datafusion-sqlparser-rs/pull/1565) (mvzink) +- Add support of the ENUM8|ENUM16 for ClickHouse dialect [#1574](https://github.com/apache/datafusion-sqlparser-rs/pull/1574) (git-hulk) +- Parse Snowflake USE ROLE and USE SECONDARY ROLES [#1578](https://github.com/apache/datafusion-sqlparser-rs/pull/1578) (yoavcloud) +- Snowflake ALTER TABLE clustering options [#1579](https://github.com/apache/datafusion-sqlparser-rs/pull/1579) (yoavcloud) +- Support INSERT OVERWRITE INTO syntax [#1584](https://github.com/apache/datafusion-sqlparser-rs/pull/1584) (yuval-illumex) +- Parse `INSERT` with subquery when lacking column names [#1586](https://github.com/apache/datafusion-sqlparser-rs/pull/1586) (iffyio) +- Add support for ODBC functions [#1585](https://github.com/apache/datafusion-sqlparser-rs/pull/1585) (iffyio) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 8 Andrew Lamb + 6 Michael Victor Zink + 5 Ophir LOJKINE + 5 Yoav Cohen + 5 wugeer + 3 Ifeanyi Ubah + 3 gaoqiangz + 3 hulk + 2 Ayman Elkfrawy + 1 Andrew Kane + 1 Jax Liu + 1 Mark-Oliver Junge + 1 Philip Cristiano + 1 Yuval Shkolar + 1 delamarch3 + 1 tomershaniii +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/derive/src/lib.rs b/derive/src/lib.rs index dd4d37b41..08c5c5db4 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -18,7 +18,11 @@ use proc_macro2::TokenStream; use quote::{format_ident, quote, quote_spanned, ToTokens}; use syn::spanned::Spanned; -use syn::{parse::{Parse, ParseStream}, parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, Token, Type, TypePath}; +use syn::{ + parse::{Parse, ParseStream}, + parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, + Ident, Index, LitStr, Meta, Token, Type, TypePath, +}; use syn::{Path, PathArguments}; /// Implementation of `[#derive(Visit)]` @@ -74,7 +78,10 @@ fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_ let expanded = quote! { // The generated impl. + // Note that it uses [`recursive::recursive`] to protect from stack overflow. + // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] fn visit( &#modifier self, visitor: &mut V @@ -267,7 +274,11 @@ fn visit_children( } fn is_option(ty: &Type) -> bool { - if let Type::Path(TypePath { path: Path { segments, .. }, .. }) = ty { + if let Type::Path(TypePath { + path: Path { segments, .. }, + .. + }) = ty + { if let Some(segment) = segments.last() { if segment.ident == "Option" { if let PathArguments::AngleBracketed(args) = &segment.arguments { diff --git a/dev/release/README.md b/dev/release/README.md index c440f7387..c3018dd68 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -146,6 +146,12 @@ Move artifacts to the release location in SVN, using the `release-tarball.sh` sc ```shell ./dev/release/release-tarball.sh 0.52.0 1 ``` + +Promote the rc tag to the release tag +```shell +git tag v0.52.0 v0.52.0-rc3 +git push apache v0.52.0 +``` Congratulations! The release is now official! diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 32a6da1bc..74cac5c97 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -42,6 +42,46 @@ fn basic_queries(c: &mut Criterion) { group.bench_function("sqlparser::with_select", |b| { b.iter(|| Parser::parse_sql(&dialect, with_query).unwrap()); }); + + let large_statement = { + let expressions = (0..1000) + .map(|n| format!("FN_{}(COL_{})", n, n)) + .collect::>() + .join(", "); + let tables = (0..1000) + .map(|n| format!("TABLE_{}", n)) + .collect::>() + .join(" JOIN "); + let where_condition = (0..1000) + .map(|n| format!("COL_{} = {}", n, n)) + .collect::>() + .join(" OR "); + let order_condition = (0..1000) + .map(|n| format!("COL_{} DESC", n)) + .collect::>() + .join(", "); + + format!( + "SELECT {} FROM {} WHERE {} ORDER BY {}", + expressions, tables, where_condition, order_condition + ) + }; + + group.bench_function("parse_large_statement", |b| { + b.iter(|| Parser::parse_sql(&dialect, criterion::black_box(large_statement.as_str()))); + }); + + let large_statement = Parser::parse_sql(&dialect, large_statement.as_str()) + .unwrap() + .pop() + .unwrap(); + + group.bench_function("format_large_statement", |b| { + b.iter(|| { + let formatted_query = large_statement.to_string(); + assert_eq!(formatted_query, large_statement); + }); + }); } criterion_group!(benches, basic_queries); diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index fbfdc2dcf..02aa6cc9f 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField}; +use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, UnionField}; use super::{value::escape_single_quote_string, ColumnDef}; +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum EnumMember { + Name(String), + /// ClickHouse allows to specify an integer value for each enum value. + /// + /// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum) + NamedValue(String, Expr), +} + /// SQL data types #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -243,7 +254,7 @@ pub enum DataType { /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float8, /// Double - Double, + Double(ExactNumberInfo), /// Double PRECISION e.g. [standard], [postgresql] /// /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#approximate-numeric-type @@ -307,6 +318,16 @@ pub enum DataType { FixedString(u64), /// Bytea Bytea, + /// Bit string, e.g. [Postgres], [MySQL], or [MSSQL] + /// + /// [Postgres]: https://www.postgresql.org/docs/current/datatype-bit.html + /// [MySQL]: https://dev.mysql.com/doc/refman/9.1/en/bit-type.html + /// [MSSQL]: https://learn.microsoft.com/en-us/sql/t-sql/data-types/bit-transact-sql?view=sql-server-ver16 + Bit(Option), + /// Variable-length bit string e.g. [Postgres] + /// + /// [Postgres]: https://www.postgresql.org/docs/current/datatype-bit.html + BitVarying(Option), /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays @@ -324,7 +345,7 @@ pub enum DataType { /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested Nested(Vec), /// Enums - Enum(Vec), + Enum(Vec, Option), /// Set Set(Vec), /// Struct @@ -352,6 +373,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/current/plpgsql-trigger.html Trigger, + /// Any data type, used in BigQuery UDF definitions for templated parameters + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/user-defined-functions#templated-sql-udf-parameters + AnyType, } impl fmt::Display for DataType { @@ -362,7 +387,6 @@ impl fmt::Display for DataType { DataType::CharacterVarying(size) => { format_character_string_type(f, "CHARACTER VARYING", size) } - DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size), DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size), DataType::Nvarchar(size) => format_character_string_type(f, "NVARCHAR", size), @@ -484,7 +508,7 @@ impl fmt::Display for DataType { DataType::Float4 => write!(f, "FLOAT4"), DataType::Float32 => write!(f, "Float32"), DataType::Float64 => write!(f, "FLOAT64"), - DataType::Double => write!(f, "DOUBLE"), + DataType::Double(info) => write!(f, "DOUBLE{info}"), DataType::Float8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), DataType::Bool => write!(f, "BOOL"), @@ -518,6 +542,10 @@ impl fmt::Display for DataType { DataType::LongText => write!(f, "LONGTEXT"), DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false), DataType::Bytea => write!(f, "BYTEA"), + DataType::Bit(size) => format_type_with_optional_length(f, "BIT", size, false), + DataType::BitVarying(size) => { + format_type_with_optional_length(f, "BIT VARYING", size, false) + } DataType::Array(ty) => match ty { ArrayElemTypeDef::None => write!(f, "ARRAY"), ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"), @@ -532,13 +560,24 @@ impl fmt::Display for DataType { write!(f, "{}({})", ty, modifiers.join(", ")) } } - DataType::Enum(vals) => { - write!(f, "ENUM(")?; + DataType::Enum(vals, bits) => { + match bits { + Some(bits) => write!(f, "ENUM{}", bits), + None => write!(f, "ENUM"), + }?; + write!(f, "(")?; for (i, v) in vals.iter().enumerate() { if i != 0 { write!(f, ", ")?; } - write!(f, "'{}'", escape_single_quote_string(v))?; + match v { + EnumMember::Name(name) => { + write!(f, "'{}'", escape_single_quote_string(name))? + } + EnumMember::NamedValue(name, value) => { + write!(f, "'{}' = {}", escape_single_quote_string(name), value)? + } + } } write!(f, ")") } @@ -590,6 +629,7 @@ impl fmt::Display for DataType { } DataType::Unspecified => Ok(()), DataType::Trigger => write!(f, "TRIGGER"), + DataType::AnyType => write!(f, "ANY TYPE"), } } } diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index d47476ffa..735ab0cce 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use super::{Expr, Ident, Password}; +use super::{display_comma_separated, Expr, Ident, Password}; use crate::ast::{display_separated, ObjectName}; /// An option in `ROLE` statement. @@ -204,12 +204,14 @@ impl fmt::Display for AlterRoleOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Use { - Catalog(ObjectName), // e.g. `USE CATALOG foo.bar` - Schema(ObjectName), // e.g. `USE SCHEMA foo.bar` - Database(ObjectName), // e.g. `USE DATABASE foo.bar` - Warehouse(ObjectName), // e.g. `USE WAREHOUSE foo.bar` - Object(ObjectName), // e.g. `USE foo.bar` - Default, // e.g. `USE DEFAULT` + Catalog(ObjectName), // e.g. `USE CATALOG foo.bar` + Schema(ObjectName), // e.g. `USE SCHEMA foo.bar` + Database(ObjectName), // e.g. `USE DATABASE foo.bar` + Warehouse(ObjectName), // e.g. `USE WAREHOUSE foo.bar` + Role(ObjectName), // e.g. `USE ROLE PUBLIC` + SecondaryRoles(SecondaryRoles), // e.g. `USE SECONDARY ROLES ALL` + Object(ObjectName), // e.g. `USE foo.bar` + Default, // e.g. `USE DEFAULT` } impl fmt::Display for Use { @@ -220,8 +222,33 @@ impl fmt::Display for Use { Use::Schema(name) => write!(f, "SCHEMA {}", name), Use::Database(name) => write!(f, "DATABASE {}", name), Use::Warehouse(name) => write!(f, "WAREHOUSE {}", name), + Use::Role(name) => write!(f, "ROLE {}", name), + Use::SecondaryRoles(secondary_roles) => { + write!(f, "SECONDARY ROLES {}", secondary_roles) + } Use::Object(name) => write!(f, "{}", name), Use::Default => write!(f, "DEFAULT"), } } } + +/// Snowflake `SECONDARY ROLES` USE variant +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SecondaryRoles { + All, + None, + List(Vec), +} + +impl fmt::Display for SecondaryRoles { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SecondaryRoles::All => write!(f, "ALL"), + SecondaryRoles::None => write!(f, "NONE"), + SecondaryRoles::List(roles) => write!(f, "{}", display_comma_separated(roles)), + } + } +} diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 9a7d297bc..c87960679 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -70,7 +70,10 @@ pub enum AlterTableOperation { /// /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#drop-projection) - DropProjection { if_exists: bool, name: Ident }, + DropProjection { + if_exists: bool, + name: Ident, + }, /// `MATERIALIZE PROJECTION [IF EXISTS] name [IN PARTITION partition_name]` /// @@ -99,11 +102,15 @@ pub enum AlterTableOperation { /// `DISABLE RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. - DisableRule { name: Ident }, + DisableRule { + name: Ident, + }, /// `DISABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. - DisableTrigger { name: Ident }, + DisableTrigger { + name: Ident, + }, /// `DROP CONSTRAINT [ IF EXISTS ] ` DropConstraint { if_exists: bool, @@ -152,19 +159,27 @@ pub enum AlterTableOperation { /// `ENABLE ALWAYS RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. - EnableAlwaysRule { name: Ident }, + EnableAlwaysRule { + name: Ident, + }, /// `ENABLE ALWAYS TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. - EnableAlwaysTrigger { name: Ident }, + EnableAlwaysTrigger { + name: Ident, + }, /// `ENABLE REPLICA RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. - EnableReplicaRule { name: Ident }, + EnableReplicaRule { + name: Ident, + }, /// `ENABLE REPLICA TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. - EnableReplicaTrigger { name: Ident }, + EnableReplicaTrigger { + name: Ident, + }, /// `ENABLE ROW LEVEL SECURITY` /// /// Note: this is a PostgreSQL-specific operation. @@ -172,11 +187,15 @@ pub enum AlterTableOperation { /// `ENABLE RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. - EnableRule { name: Ident }, + EnableRule { + name: Ident, + }, /// `ENABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. - EnableTrigger { name: Ident }, + EnableTrigger { + name: Ident, + }, /// `RENAME TO PARTITION (partition=val)` RenamePartitions { old_partitions: Vec, @@ -197,7 +216,9 @@ pub enum AlterTableOperation { new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: ObjectName }, + RenameTable { + table_name: ObjectName, + }, // CHANGE [ COLUMN ] [ ] ChangeColumn { old_name: Ident, @@ -218,7 +239,10 @@ pub enum AlterTableOperation { /// `RENAME CONSTRAINT TO ` /// /// Note: this is a PostgreSQL-specific operation. - RenameConstraint { old_name: Ident, new_name: Ident }, + RenameConstraint { + old_name: Ident, + new_name: Ident, + }, /// `ALTER [ COLUMN ]` AlterColumn { column_name: Ident, @@ -227,14 +251,27 @@ pub enum AlterTableOperation { /// 'SWAP WITH ' /// /// Note: this is Snowflake specific - SwapWith { table_name: ObjectName }, + SwapWith { + table_name: ObjectName, + }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' - SetTblProperties { table_properties: Vec }, - + SetTblProperties { + table_properties: Vec, + }, /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` /// /// Note: this is PostgreSQL-specific - OwnerTo { new_owner: Owner }, + OwnerTo { + new_owner: Owner, + }, + /// Snowflake table clustering options + /// + ClusterBy { + exprs: Vec, + }, + DropClusteringKey, + SuspendRecluster, + ResumeRecluster, } /// An `ALTER Policy` (`Statement::AlterPolicy`) operation @@ -548,6 +585,22 @@ impl fmt::Display for AlterTableOperation { } Ok(()) } + AlterTableOperation::ClusterBy { exprs } => { + write!(f, "CLUSTER BY ({})", display_comma_separated(exprs))?; + Ok(()) + } + AlterTableOperation::DropClusteringKey => { + write!(f, "DROP CLUSTERING KEY")?; + Ok(()) + } + AlterTableOperation::SuspendRecluster => { + write!(f, "SUSPEND RECLUSTER")?; + Ok(()) + } + AlterTableOperation::ResumeRecluster => { + write!(f, "RESUME RECLUSTER")?; + Ok(()) + } } } } @@ -669,6 +722,8 @@ pub enum TableConstraint { columns: Vec, index_options: Vec, characteristics: Option, + /// Optional Postgres nulls handling: `[ NULLS [ NOT ] DISTINCT ]` + nulls_distinct: NullsDistinctOption, }, /// MySQL [definition][1] for `PRIMARY KEY` constraints statements:\ /// * `[CONSTRAINT []] PRIMARY KEY [index_name] [index_type] () ` @@ -777,10 +832,11 @@ impl fmt::Display for TableConstraint { columns, index_options, characteristics, + nulls_distinct, } => { write!( f, - "{}UNIQUE{index_type_display:>}{}{} ({})", + "{}UNIQUE{nulls_distinct}{index_type_display:>}{}{} ({})", display_constraint_name(name), display_option_spaced(index_name), display_option(" USING ", "", index_type), @@ -829,12 +885,14 @@ impl fmt::Display for TableConstraint { } => { write!( f, - "{}FOREIGN KEY ({}) REFERENCES {}({})", + "{}FOREIGN KEY ({}) REFERENCES {}", display_constraint_name(name), display_comma_separated(columns), foreign_table, - display_comma_separated(referred_columns), )?; + if !referred_columns.is_empty() { + write!(f, "({})", display_comma_separated(referred_columns))?; + } if let Some(action) = on_delete { write!(f, " ON DELETE {action}")?; } @@ -988,6 +1046,31 @@ impl fmt::Display for IndexOption { } } +/// [Postgres] unique index nulls handling option: `[ NULLS [ NOT ] DISTINCT ]` +/// +/// [Postgres]: https://www.postgresql.org/docs/17/sql-altertable.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum NullsDistinctOption { + /// Not specified + None, + /// NULLS DISTINCT + Distinct, + /// NULLS NOT DISTINCT + NotDistinct, +} + +impl fmt::Display for NullsDistinctOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::None => Ok(()), + Self::Distinct => write!(f, " NULLS DISTINCT"), + Self::NotDistinct => write!(f, " NULLS NOT DISTINCT"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 82ed1edc4..0eaafbc11 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,18 +40,21 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::tokenizer::Span; pub use self::data_type::{ - ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, + ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, ExactNumberInfo, StructBracketKind, TimezoneInfo, }; -pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use}; +pub use self::dcl::{ + AlterRoleOperation, ResetConfig, RoleOption, SecondaryRoles, SetConfigValue, Use, +}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterPolicyOperation, AlterTableOperation, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateFunction, Deduplicate, DeferrableInitial, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, IndexOption, IndexType, KeyOrIndexDisplay, Owner, - Partition, ProcedureParam, ReferentialAction, TableConstraint, TagsColumnOption, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, + IdentityPropertyKind, IdentityPropertyOrder, IndexOption, IndexType, KeyOrIndexDisplay, + NullsDistinctOption, Owner, Partition, ProcedureParam, ReferentialAction, TableConstraint, + TagsColumnOption, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ViewColumnDef, }; pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; @@ -66,8 +69,11 @@ pub use self::query::{ OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, - Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample, + TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, + TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, + TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, + WildcardAdditionalOptions, With, WithFill, }; pub use self::trigger::{ @@ -453,40 +459,6 @@ pub enum CastFormat { ValueAtTimeZone(Value, Value), } -/// Represents the syntax/style used in a map access. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MapAccessSyntax { - /// Access using bracket notation. `mymap[mykey]` - Bracket, - /// Access using period notation. `mymap.mykey` - Period, -} - -/// Expression used to access a value in a nested structure. -/// -/// Example: `SAFE_OFFSET(0)` in -/// ```sql -/// SELECT mymap[SAFE_OFFSET(0)]; -/// ``` -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MapAccessKey { - pub key: Expr, - pub syntax: MapAccessSyntax, -} - -impl fmt::Display for MapAccessKey { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.syntax { - MapAccessSyntax::Bracket => write!(f, "[{}]", self.key), - MapAccessSyntax::Period => write!(f, ".{}", self.key), - } - } -} - /// An element of a JSON path. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -623,6 +595,28 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), + /// Multi-part expression access. + /// + /// This structure represents an access chain in structured / nested types + /// such as maps, arrays, and lists: + /// - Array + /// - A 1-dim array `a[1]` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` + /// - A 2-dim array `a[1][2]` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// - Map or Struct (Bracket-style) + /// - A map `a['field1']` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` + /// - A 2-dim map `a['field1']['field2']` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) + /// - A struct access `a[field1].field2` will be represented like: + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) + CompoundFieldAccess { + root: Box, + access_chain: Vec, + }, /// Access data nested in a value containing semi-structured data, such as /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. /// @@ -634,7 +628,7 @@ pub enum Expr { /// The path to the data to extract. path: JsonPath, }, - /// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n + /// CompositeAccess eg: SELECT foo(bar).z, (information_schema._pg_expandarray(array['i','i'])).n CompositeAccess { expr: Box, key: Ident, @@ -883,14 +877,6 @@ pub enum Expr { data_type: DataType, value: String, }, - /// Access a map-like object by field (e.g. `column['field']` or `column[4]` - /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) - /// - MapAccess { - column: Box, - keys: Vec, - }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// Arbitrary expr method call @@ -979,11 +965,6 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps Map(Map), - /// An access of nested data using subscript syntax, for example `array[2]`. - Subscript { - expr: Box, - subscript: Box, - }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), /// An interval expression e.g. `INTERVAL '1' YEAR` @@ -1100,6 +1081,27 @@ impl fmt::Display for Subscript { } } +/// An element of a [`Expr::CompoundFieldAccess`]. +/// It can be an expression or a subscript. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AccessExpr { + /// Accesses a field using dot notation, e.g. `foo.bar.baz`. + Dot(Expr), + /// Accesses a field or array element using bracket notation, e.g. `foo['bar']`. + Subscript(Subscript), +} + +impl fmt::Display for AccessExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AccessExpr::Dot(expr) => write!(f, ".{}", expr), + AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript), + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1292,15 +1294,20 @@ impl fmt::Display for CastFormat { } impl fmt::Display for Expr { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{s}"), - Expr::MapAccess { column, keys } => { - write!(f, "{column}{}", display_separated(keys, "")) - } Expr::Wildcard(_) => f.write_str("*"), Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), + Expr::CompoundFieldAccess { root, access_chain } => { + write!(f, "{}", root)?; + for field in access_chain { + write!(f, "{}", field)?; + } + Ok(()) + } Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"), Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"), Expr::IsFalse(ast) => write!(f, "{ast} IS FALSE"), @@ -1731,12 +1738,6 @@ impl fmt::Display for Expr { Expr::Map(map) => { write!(f, "{map}") } - Expr::Subscript { - expr, - subscript: key, - } => { - write!(f, "{expr}[{key}]") - } Expr::Array(set) => { write!(f, "{set}") } @@ -2366,6 +2367,7 @@ pub enum Statement { cache_metadata: bool, noscan: bool, compute_statistics: bool, + has_table_keyword: bool, }, /// ```sql /// TRUNCATE @@ -2489,7 +2491,7 @@ pub enum Statement { /// Column assignments assignments: Vec, /// Table which provide value to be set - from: Option, + from: Option, /// WHERE selection: Option, /// RETURNING @@ -2771,6 +2773,18 @@ pub enum Statement { version: Option, }, /// ```sql + /// DROP EXTENSION [ IF EXISTS ] name [, ...] [ CASCADE | RESTRICT ] + /// + /// Note: this is a PostgreSQL-specific statement. + /// https://www.postgresql.org/docs/current/sql-dropextension.html + /// ``` + DropExtension { + names: Vec, + if_exists: bool, + /// `CASCADE` or `RESTRICT` + cascade_or_restrict: Option, + }, + /// ```sql /// FETCH /// ``` /// Retrieve rows from a query using a cursor @@ -2961,6 +2975,7 @@ pub enum Statement { StartTransaction { modes: Vec, begin: bool, + transaction: Option, /// Only for SQLite modifier: Option, }, @@ -3250,6 +3265,9 @@ pub enum Statement { /// /// [SQLite](https://sqlite.org/lang_explain.html) query_plan: bool, + /// `EXPLAIN ESTIMATE` + /// [Clickhouse](https://clickhouse.com/docs/en/sql-reference/statements/explain#explain-estimate) + estimate: bool, /// A SQL query that specifies what to explain statement: Box, /// Optional output format of explain @@ -3482,6 +3500,7 @@ impl fmt::Display for Statement { verbose, analyze, query_plan, + estimate, statement, format, options, @@ -3494,6 +3513,9 @@ impl fmt::Display for Statement { if *analyze { write!(f, "ANALYZE ")?; } + if *estimate { + write!(f, "ESTIMATE ")?; + } if *verbose { write!(f, "VERBOSE ")?; @@ -3655,8 +3677,13 @@ impl fmt::Display for Statement { cache_metadata, noscan, compute_statistics, + has_table_keyword, } => { - write!(f, "ANALYZE TABLE {table_name}")?; + write!( + f, + "ANALYZE{}{table_name}", + if *has_table_keyword { " TABLE " } else { " " } + )?; if let Some(ref parts) = partitions { if !parts.is_empty() { write!(f, " PARTITION ({})", display_comma_separated(parts))?; @@ -3748,10 +3775,13 @@ impl fmt::Display for Statement { write!(f, "{or} ")?; } write!(f, "{table}")?; + if let Some(UpdateTableFromKind::BeforeSet(from)) = from { + write!(f, " FROM {from}")?; + } if !assignments.is_empty() { write!(f, " SET {}", display_comma_separated(assignments))?; } - if let Some(from) = from { + if let Some(UpdateTableFromKind::AfterSet(from)) = from { write!(f, " FROM {from}")?; } if let Some(selection) = selection { @@ -4029,6 +4059,21 @@ impl fmt::Display for Statement { Ok(()) } + Statement::DropExtension { + names, + if_exists, + cascade_or_restrict, + } => { + write!(f, "DROP EXTENSION")?; + if *if_exists { + write!(f, " IF EXISTS")?; + } + write!(f, " {}", display_comma_separated(names))?; + if let Some(cascade_or_restrict) = cascade_or_restrict { + write!(f, " {cascade_or_restrict}")?; + } + Ok(()) + } Statement::CreateRole { names, if_not_exists, @@ -4536,16 +4581,20 @@ impl fmt::Display for Statement { Statement::StartTransaction { modes, begin: syntax_begin, + transaction, modifier, } => { if *syntax_begin { if let Some(modifier) = *modifier { - write!(f, "BEGIN {} TRANSACTION", modifier)?; + write!(f, "BEGIN {}", modifier)?; } else { - write!(f, "BEGIN TRANSACTION")?; + write!(f, "BEGIN")?; } } else { - write!(f, "START TRANSACTION")?; + write!(f, "START")?; + } + if let Some(transaction) = transaction { + write!(f, " {transaction}")?; } if !modes.is_empty() { write!(f, " {}", display_comma_separated(modes))?; @@ -5040,6 +5089,24 @@ pub enum TruncateCascadeOption { Restrict, } +/// Transaction started with [ TRANSACTION | WORK ] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum BeginTransactionKind { + Transaction, + Work, +} + +impl Display for BeginTransactionKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BeginTransactionKind::Transaction => write!(f, "TRANSACTION"), + BeginTransactionKind::Work => write!(f, "WORK"), + } + } +} + /// Can use to describe options in create sequence or table column type identity /// [ MINVALUE minvalue | NO MINVALUE ] [ MAXVALUE maxvalue | NO MAXVALUE ] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -5515,6 +5582,15 @@ impl fmt::Display for CloseCursor { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { pub name: ObjectName, + /// Flags whether this function call uses the [ODBC syntax]. + /// + /// Example: + /// ```sql + /// SELECT {fn CONCAT('foo', 'bar')} + /// ``` + /// + /// [ODBC syntax]: https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/scalar-function-calls?view=sql-server-2017 + pub uses_odbc_syntax: bool, /// The parameters to the function, including any options specified within the /// delimiting parentheses. /// @@ -5553,6 +5629,10 @@ pub struct Function { impl fmt::Display for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.uses_odbc_syntax { + write!(f, "{{fn ")?; + } + write!(f, "{}{}{}", self.name, self.parameters, self.args)?; if !self.within_group.is_empty() { @@ -5575,6 +5655,10 @@ impl fmt::Display for Function { write!(f, " OVER {o}")?; } + if self.uses_odbc_syntax { + write!(f, "}}")?; + } + Ok(()) } } diff --git a/src/ast/query.rs b/src/ast/query.rs index ad7fd261e..9e4e9e2ef 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -350,7 +350,9 @@ impl fmt::Display for Select { } } - write!(f, " {}", display_comma_separated(&self.projection))?; + if !self.projection.is_empty() { + write!(f, " {}", display_comma_separated(&self.projection))?; + } if let Some(ref into) = self.into { write!(f, " {into}")?; @@ -1002,6 +1004,9 @@ pub enum TableFactor { partitions: Vec, /// Optional PartiQL JsonPath: json_path: Option, + /// Optional table sample modifier + /// See: + sample: Option, }, Derived { lateral: bool, @@ -1146,6 +1151,184 @@ pub enum TableFactor { }, } +/// The table sample modifier options +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] + +pub enum TableSampleKind { + /// Table sample located before the table alias option + BeforeTableAlias(Box), + /// Table sample located after the table alias option + AfterTableAlias(Box), +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSample { + pub modifier: TableSampleModifier, + pub name: Option, + pub quantity: Option, + pub seed: Option, + pub bucket: Option, + pub offset: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleModifier { + Sample, + TableSample, +} + +impl fmt::Display for TableSampleModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleModifier::Sample => write!(f, "SAMPLE")?, + TableSampleModifier::TableSample => write!(f, "TABLESAMPLE")?, + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleQuantity { + pub parenthesized: bool, + pub value: Expr, + pub unit: Option, +} + +impl fmt::Display for TableSampleQuantity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.parenthesized { + write!(f, "(")?; + } + write!(f, "{}", self.value)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + if self.parenthesized { + write!(f, ")")?; + } + Ok(()) + } +} + +/// The table sample method names +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleMethod { + Row, + Bernoulli, + System, + Block, +} + +impl fmt::Display for TableSampleMethod { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleMethod::Bernoulli => write!(f, "BERNOULLI"), + TableSampleMethod::Row => write!(f, "ROW"), + TableSampleMethod::System => write!(f, "SYSTEM"), + TableSampleMethod::Block => write!(f, "BLOCK"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleSeed { + pub modifier: TableSampleSeedModifier, + pub value: Value, +} + +impl fmt::Display for TableSampleSeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ({})", self.modifier, self.value)?; + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleSeedModifier { + Repeatable, + Seed, +} + +impl fmt::Display for TableSampleSeedModifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleSeedModifier::Repeatable => write!(f, "REPEATABLE"), + TableSampleSeedModifier::Seed => write!(f, "SEED"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableSampleUnit { + Rows, + Percent, +} + +impl fmt::Display for TableSampleUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TableSampleUnit::Percent => write!(f, "PERCENT"), + TableSampleUnit::Rows => write!(f, "ROWS"), + } + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableSampleBucket { + pub bucket: Value, + pub total: Value, + pub on: Option, +} + +impl fmt::Display for TableSampleBucket { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?; + if let Some(on) = &self.on { + write!(f, " ON {}", on)?; + } + Ok(()) + } +} +impl fmt::Display for TableSample { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, " {}", self.modifier)?; + if let Some(name) = &self.name { + write!(f, " {}", name)?; + } + if let Some(quantity) = &self.quantity { + write!(f, " {}", quantity)?; + } + if let Some(seed) = &self.seed { + write!(f, " {}", seed)?; + } + if let Some(bucket) = &self.bucket { + write!(f, " ({})", bucket)?; + } + if let Some(offset) = &self.offset { + write!(f, " OFFSET {}", offset)?; + } + Ok(()) + } +} + /// The source of values in a `PIVOT` operation. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1404,6 +1587,7 @@ impl fmt::Display for TableFactor { partitions, with_ordinality, json_path, + sample, } => { write!(f, "{name}")?; if let Some(json_path) = json_path { @@ -1426,6 +1610,9 @@ impl fmt::Display for TableFactor { if *with_ordinality { write!(f, " WITH ORDINALITY")?; } + if let Some(TableSampleKind::BeforeTableAlias(sample)) = sample { + write!(f, "{sample}")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1435,6 +1622,9 @@ impl fmt::Display for TableFactor { if let Some(version) = version { write!(f, "{version}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, "{sample}")?; + } Ok(()) } TableFactor::Derived { @@ -2600,3 +2790,16 @@ impl fmt::Display for ValueTableMode { } } } + +/// The `FROM` clause of an `UPDATE TABLE` statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum UpdateTableFromKind { + /// Update Statment where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake) + /// For Example: `UPDATE FROM t1 SET t1.name='aaa'` + BeforeSet(TableWithJoins), + /// Update Statment where the 'FROM' clause is after the 'SET' keyword (Which is the standard way) + /// For Example: `UPDATE SET t1.name='aaa' FROM t1` + AfterSet(TableWithJoins), +} diff --git a/src/ast/spans.rs b/src/ast/spans.rs index b84ee78c9..0e4af57db 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1,22 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use core::iter; use crate::tokenizer::Span; use super::{ - AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, Assignment, - AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, - ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, - CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, - ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, - FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, - IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, Measure, NamedWindowDefinition, - ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByExpr, Partition, - PivotValueSource, ProjectionSelect, Query, ReferentialAction, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, - Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, - TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values, ViewColumnDef, - WildcardAdditionalOptions, With, WithFill, + dcl::SecondaryRoles, AccessExpr, AlterColumnOperation, AlterIndexOperation, + AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, + ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, + CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, + ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch, FromTable, Function, + FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, + GroupByExpr, HavingBound, IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, + JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, + Measure, NamedWindowDefinition, ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, + OrderBy, OrderByExpr, Partition, PivotValueSource, ProjectionSelect, Query, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableOptionsClustered, TableWithJoins, UpdateTableFromKind, Use, + Value, Values, ViewColumnDef, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -267,6 +284,7 @@ impl Spanned for Statement { cache_metadata: _, noscan: _, compute_statistics: _, + has_table_keyword: _, } => union_spans( core::iter::once(table_name.span()) .chain(partitions.iter().flat_map(|i| i.iter().map(|k| k.span()))) @@ -412,6 +430,7 @@ impl Spanned for Statement { Statement::DropSecret { .. } => Span::empty(), Statement::Declare { .. } => Span::empty(), Statement::CreateExtension { .. } => Span::empty(), + Statement::DropExtension { .. } => Span::empty(), Statement::Fetch { .. } => Span::empty(), Statement::Flush { .. } => Span::empty(), Statement::Discard { .. } => Span::empty(), @@ -484,6 +503,13 @@ impl Spanned for Use { Use::Schema(object_name) => object_name.span(), Use::Database(object_name) => object_name.span(), Use::Warehouse(object_name) => object_name.span(), + Use::Role(object_name) => object_name.span(), + Use::SecondaryRoles(secondary_roles) => { + if let SecondaryRoles::List(roles) = secondary_roles { + return union_spans(roles.iter().map(|i| i.span)); + } + Span::empty() + } Use::Object(object_name) => object_name.span(), Use::Default => Span::empty(), } @@ -587,6 +613,7 @@ impl Spanned for TableConstraint { columns, index_options: _, characteristics, + nulls_distinct: _, } => union_spans( name.iter() .map(|i| i.span) @@ -1012,6 +1039,10 @@ impl Spanned for AlterTableOperation { union_spans(table_properties.iter().map(|i| i.span())) } AlterTableOperation::OwnerTo { .. } => Span::empty(), + AlterTableOperation::ClusterBy { exprs } => union_spans(exprs.iter().map(|e| e.span())), + AlterTableOperation::DropClusteringKey => Span::empty(), + AlterTableOperation::SuspendRecluster => Span::empty(), + AlterTableOperation::ResumeRecluster => Span::empty(), } } } @@ -1232,6 +1263,9 @@ impl Spanned for Expr { Expr::Identifier(ident) => ident.span, Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i| i.span)), Expr::CompositeAccess { expr, key } => expr.span().union(&key.span), + Expr::CompoundFieldAccess { root, access_chain } => { + union_spans(iter::once(root.span()).chain(access_chain.iter().map(|i| i.span()))) + } Expr::IsFalse(expr) => expr.span(), Expr::IsNotFalse(expr) => expr.span(), Expr::IsTrue(expr) => expr.span(), @@ -1311,9 +1345,6 @@ impl Spanned for Expr { Expr::Nested(expr) => expr.span(), Expr::Value(value) => value.span(), Expr::TypedString { .. } => Span::empty(), - Expr::MapAccess { column, keys } => column - .span() - .union(&union_spans(keys.iter().map(|i| i.key.span()))), Expr::Function(function) => function.span(), Expr::GroupingSets(vec) => { union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span()))) @@ -1409,7 +1440,6 @@ impl Spanned for Expr { Expr::Named { .. } => Span::empty(), Expr::Dictionary(_) => Span::empty(), Expr::Map(_) => Span::empty(), - Expr::Subscript { expr, subscript } => expr.span().union(&subscript.span()), Expr::Interval(interval) => interval.value.span(), Expr::Wildcard(token) => token.0.span, Expr::QualifiedWildcard(object_name, token) => union_spans( @@ -1448,6 +1478,15 @@ impl Spanned for Subscript { } } +impl Spanned for AccessExpr { + fn span(&self) -> Span { + match self { + AccessExpr::Dot(ident) => ident.span(), + AccessExpr::Subscript(subscript) => subscript.span(), + } + } +} + impl Spanned for ObjectName { fn span(&self) -> Span { let ObjectName(segments) = self; @@ -1471,6 +1510,7 @@ impl Spanned for Function { fn span(&self) -> Span { let Function { name, + uses_odbc_syntax: _, parameters, args, filter, @@ -1674,6 +1714,7 @@ impl Spanned for TableFactor { with_ordinality: _, partitions: _, json_path: _, + sample: _, } => union_spans( name.0 .iter() @@ -2071,6 +2112,15 @@ impl Spanned for SelectInto { } } +impl Spanned for UpdateTableFromKind { + fn span(&self) -> Span { + match self { + UpdateTableFromKind::BeforeSet(from) => from.span(), + UpdateTableFromKind::AfterSet(from) => from.span(), + } + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index eacd268a4..c824ad2f3 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -530,6 +530,7 @@ where /// let old_expr = std::mem::replace(expr, Expr::Value(Value::Null)); /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), +/// uses_odbc_syntax: false, /// args: FunctionArguments::List(FunctionArgumentList { /// duplicate_treatment: None, /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], @@ -893,4 +894,29 @@ mod tests { assert_eq!(actual, expected) } } + + struct QuickVisitor; // [`TestVisitor`] is too slow to iterate over thousands of nodes + + impl Visitor for QuickVisitor { + type Break = (); + } + + #[test] + fn overflow() { + let cond = (0..1000) + .map(|n| format!("X = {}", n)) + .collect::>() + .join(" OR "); + let sql = format!("SELECT x where {0}", cond); + + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, sql.as_str()).tokenize().unwrap(); + let s = Parser::new(&dialect) + .with_tokens(tokens) + .parse_statement() + .unwrap(); + + let mut visitor = QuickVisitor {}; + s.visit(&mut visitor); + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 61e5070fb..f852152a0 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -127,4 +127,8 @@ impl Dialect for GenericDialect { fn supports_struct_literal(&self) -> bool { true } + + fn supports_empty_projections(&self) -> bool { + true + } } diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 571f9b9ba..80f44cf7c 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -61,4 +61,9 @@ impl Dialect for HiveDialect { fn supports_load_data(&self) -> bool { true } + + /// See Hive + fn supports_table_sample_before_alias(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index f40cba719..1343efca6 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -75,6 +75,15 @@ macro_rules! dialect_of { }; } +// Similar to above, but for applying directly against an instance of dialect +// instead of a struct member named dialect. This avoids lifetime issues when +// mixing match guards and token references. +macro_rules! dialect_is { + ($dialect:ident is $($dialect_type:ty)|+) => { + ($($dialect.is::<$dialect_type>())||+) + } +} + /// Encapsulates the differences between SQL implementations. /// /// # SQL Dialects @@ -128,14 +137,39 @@ pub trait Dialect: Debug + Any { ch == '"' || ch == '`' } - /// Return the character used to quote identifiers. - fn identifier_quote_style(&self, _identifier: &str) -> Option { + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool { + false + } + + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut _chars: Peekable>, + ) -> Option<(char, Option)> { None } - /// Determine if quoted characters are proper for identifier - fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { - true + /// Return the character used to quote identifiers. + fn identifier_quote_style(&self, _identifier: &str) -> Option { + None } /// Determine if a character is a valid start character for an unquoted identifier @@ -385,6 +419,16 @@ pub trait Dialect: Debug + Any { false } + /// Return true if the dialect supports empty projections in SELECT statements + /// + /// Example + /// ```sql + /// SELECT from table_name + /// ``` + fn supports_empty_projections(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. @@ -592,6 +636,12 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect allows dollar placeholders + /// e.g. `SELECT $var` (SQLite) + fn supports_dollar_placeholder(&self) -> bool { + false + } + /// Does the dialect support with clause in create index statement? /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)` fn supports_create_index_with_clause(&self) -> bool { @@ -707,6 +757,17 @@ pub trait Dialect: Debug + Any { fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } + + /// Returns true if this dialect supports the `TABLESAMPLE` option + /// before the table alias option. For example: + /// + /// Table sample before alias: `SELECT * FROM tbl AS t TABLESAMPLE (10)` + /// Table sample after alias: `SELECT * FROM tbl TABLESAMPLE (10) AS t` + /// + /// + fn supports_table_sample_before_alias(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined @@ -858,6 +919,17 @@ mod tests { self.0.is_delimited_identifier_start(ch) } + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + self.0.is_nested_delimited_identifier_start(ch) + } + + fn peek_nested_delimited_identifier_quotes( + &self, + chars: std::iter::Peekable>, + ) -> Option<(char, Option)> { + self.0.peek_nested_delimited_identifier_quotes(chars) + } + fn identifier_quote_style(&self, identifier: &str) -> Option { self.0.identifier_quote_style(identifier) } @@ -866,13 +938,6 @@ mod tests { self.0.supports_string_literal_backslash_escape() } - fn is_proper_identifier_inside_quotes( - &self, - chars: std::iter::Peekable>, - ) -> bool { - self.0.is_proper_identifier_inside_quotes(chars) - } - fn supports_filter_during_aggregation(&self) -> bool { self.0.supports_filter_during_aggregation() } diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 197ce48d4..1ede59f5a 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -113,7 +113,7 @@ fn parse_lock_tables(parser: &mut Parser) -> Result { // tbl_name [[AS] alias] lock_type fn parse_lock_table(parser: &mut Parser) -> Result { - let table = parser.parse_identifier(false)?; + let table = parser.parse_identifier()?; let alias = parser.parse_optional_alias(&[Keyword::READ, Keyword::WRITE, Keyword::LOW_PRIORITY])?; let lock_type = parse_lock_tables_type(parser)?; diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index dcdcc88c1..6a13a386a 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -231,15 +231,25 @@ impl Dialect for PostgreSqlDialect { fn supports_named_fn_args_with_expr_name(&self) -> bool { true } + + /// Return true if the dialect supports empty projections in SELECT statements + /// + /// Example + /// ```sql + /// SELECT from table_name + /// ``` + fn supports_empty_projections(&self) -> bool { + true + } } pub fn parse_create(parser: &mut Parser) -> Option> { let name = parser.maybe_parse(|parser| -> Result { - parser.expect_keyword(Keyword::CREATE)?; - parser.expect_keyword(Keyword::TYPE)?; + parser.expect_keyword_is(Keyword::CREATE)?; + parser.expect_keyword_is(Keyword::TYPE)?; let name = parser.parse_object_name(false)?; - parser.expect_keyword(Keyword::AS)?; - parser.expect_keyword(Keyword::ENUM)?; + parser.expect_keyword_is(Keyword::AS)?; + parser.expect_keyword_is(Keyword::ENUM)?; Ok(name) }); @@ -258,7 +268,7 @@ pub fn parse_create_type_as_enum( return parser.expected("'(' after CREATE TYPE AS ENUM", parser.peek_token()); } - let labels = parser.parse_comma_separated0(|p| p.parse_identifier(false), Token::RParen)?; + let labels = parser.parse_comma_separated0(|p| p.parse_identifier(), Token::RParen)?; parser.expect_token(&Token::RParen)?; Ok(Statement::CreateType { diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 48eb00ab1..55405ba53 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {} // in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will // be a json path impl Dialect for RedshiftSqlDialect { - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' || ch == '[' + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + ch == '[' } - /// Determine if quoted characters are proper for identifier - /// It's needed to distinguish treating square brackets as quotes from - /// treating them as json path. If there is identifier then we assume - /// there is no json path. - fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut chars: Peekable>, + ) -> Option<(char, Option)> { + if chars.peek() != Some(&'[') { + return None; + } + chars.next(); + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); + if let Some(&ch) = not_white_chars.peek() { - return self.is_identifier_start(ch); + if ch == '"' { + return Some(('[', Some('"'))); + } + if self.is_identifier_start(ch) { + return Some(('[', None)); + } } - false + + None } fn is_identifier_start(&self, ch: char) -> bool { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 77d2ccff1..249241d73 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -234,6 +234,10 @@ impl Dialect for SnowflakeDialect { RESERVED_FOR_IDENTIFIER.contains(&kw) } } + + fn supports_partiql(&self) -> bool { + true + } } /// Parse snowflake create table statement. @@ -269,7 +273,7 @@ pub fn parse_create_table( match &next_token.token { Token::Word(word) => match word.keyword { Keyword::COPY => { - parser.expect_keyword(Keyword::GRANTS)?; + parser.expect_keyword_is(Keyword::GRANTS)?; builder = builder.copy_grants(true); } Keyword::COMMENT => { @@ -293,10 +297,10 @@ pub fn parse_create_table( break; } Keyword::CLUSTER => { - parser.expect_keyword(Keyword::BY)?; + parser.expect_keyword_is(Keyword::BY)?; parser.expect_token(&Token::LParen)?; let cluster_by = Some(WrappedCollection::Parentheses( - parser.parse_comma_separated(|p| p.parse_identifier(false))?, + parser.parse_comma_separated(|p| p.parse_identifier())?, )); parser.expect_token(&Token::RParen)?; @@ -356,16 +360,16 @@ pub fn parse_create_table( parser.prev_token(); } Keyword::AGGREGATION => { - parser.expect_keyword(Keyword::POLICY)?; + parser.expect_keyword_is(Keyword::POLICY)?; let aggregation_policy = parser.parse_object_name(false)?; builder = builder.with_aggregation_policy(Some(aggregation_policy)); } Keyword::ROW => { parser.expect_keywords(&[Keyword::ACCESS, Keyword::POLICY])?; let policy = parser.parse_object_name(false)?; - parser.expect_keyword(Keyword::ON)?; + parser.expect_keyword_is(Keyword::ON)?; parser.expect_token(&Token::LParen)?; - let columns = parser.parse_comma_separated(|p| p.parse_identifier(false))?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; parser.expect_token(&Token::RParen)?; builder = @@ -377,6 +381,10 @@ pub fn parse_create_table( parser.expect_token(&Token::RParen)?; builder = builder.with_tags(Some(tags)); } + Keyword::ON if parser.parse_keyword(Keyword::COMMIT) => { + let on_commit = Some(parser.parse_create_table_on_commit()?); + builder = builder.on_commit(on_commit); + } _ => { return parser.expected("end of statement", next_token); } @@ -528,15 +536,15 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { let from_stage: ObjectName; let stage_params: StageParamsObject; - parser.expect_keyword(Keyword::FROM)?; + parser.expect_keyword_is(Keyword::FROM)?; // check if data load transformations are present match parser.next_token().token { Token::LParen => { // data load with transformations - parser.expect_keyword(Keyword::SELECT)?; + parser.expect_keyword_is(Keyword::SELECT)?; from_transformations = parse_select_items_for_data_load(parser)?; - parser.expect_keyword(Keyword::FROM)?; + parser.expect_keyword_is(Keyword::FROM)?; from_stage = parse_snowflake_stage_name(parser)?; stage_params = parse_stage_params(parser)?; @@ -852,7 +860,7 @@ fn parse_identity_property(parser: &mut Parser) -> Result Result { - let policy_name = parser.parse_identifier(false)?; + let policy_name = parser.parse_identifier()?; let using_columns = if parser.parse_keyword(Keyword::USING) { parser.expect_token(&Token::LParen)?; - let columns = parser.parse_comma_separated(|p| p.parse_identifier(false))?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; parser.expect_token(&Token::RParen)?; Some(columns) } else { diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 95717f9fd..138c4692c 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -81,4 +81,8 @@ impl Dialect for SQLiteDialect { fn supports_asc_desc_in_column_definition(&self) -> bool { true } + + fn supports_dollar_placeholder(&self) -> bool { + true + } } diff --git a/src/keywords.rs b/src/keywords.rs index 4ec088941..43abc2b03 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -120,19 +120,23 @@ define_keywords!( BEGIN, BEGIN_FRAME, BEGIN_PARTITION, + BERNOULLI, BETWEEN, BIGDECIMAL, BIGINT, BIGNUMERIC, BINARY, BINDING, + BIT, BLOB, + BLOCK, BLOOMFILTER, BOOL, BOOLEAN, BOTH, BROWSE, BTREE, + BUCKET, BUCKETS, BY, BYPASSRLS, @@ -167,6 +171,7 @@ define_keywords!( CLOSE, CLUSTER, CLUSTERED, + CLUSTERING, COALESCE, COLLATE, COLLATION, @@ -285,12 +290,15 @@ define_keywords!( ENFORCED, ENGINE, ENUM, + ENUM16, + ENUM8, EPHEMERAL, EPOCH, EQUALS, ERROR, ESCAPE, ESCAPED, + ESTIMATE, EVENT, EVERY, EXCEPT, @@ -329,6 +337,7 @@ define_keywords!( FLOAT8, FLOOR, FLUSH, + FN, FOLLOWING, FOR, FORCE, @@ -619,6 +628,7 @@ define_keywords!( READS, READ_ONLY, REAL, + RECLUSTER, RECURSIVE, REF, REFERENCES, @@ -653,6 +663,7 @@ define_keywords!( RESTRICTIVE, RESULT, RESULTSET, + RESUME, RETAIN, RETURN, RETURNING, @@ -661,6 +672,7 @@ define_keywords!( RIGHT, RLIKE, ROLE, + ROLES, ROLLBACK, ROLLUP, ROOT, @@ -672,6 +684,7 @@ define_keywords!( RUN, SAFE, SAFE_CAST, + SAMPLE, SAVEPOINT, SCHEMA, SCHEMAS, @@ -679,8 +692,10 @@ define_keywords!( SCROLL, SEARCH, SECOND, + SECONDARY, SECRET, SECURITY, + SEED, SELECT, SEMI, SENSITIVE, @@ -740,6 +755,7 @@ define_keywords!( SUM, SUPER, SUPERUSER, + SUSPEND, SWAP, SYMMETRIC, SYNC, @@ -922,6 +938,10 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::CONNECT, // Reserved for snowflake MATCH_RECOGNIZE Keyword::MATCH_RECOGNIZE, + // Reserved for Snowflake table sample + Keyword::SAMPLE, + Keyword::TABLESAMPLE, + Keyword::FROM, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 3ac4ab0c7..bb6782c13 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -51,13 +51,13 @@ impl Parser<'_> { /// /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-alterpolicy.html) pub fn parse_alter_policy(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; + let name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; if self.parse_keyword(Keyword::RENAME) { - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier(false)?; + self.expect_keyword_is(Keyword::TO)?; + let new_name = self.parse_identifier()?; Ok(Statement::AlterPolicy { name, table_name, @@ -100,17 +100,17 @@ impl Parser<'_> { } fn parse_mssql_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; + let role_name = self.parse_identifier()?; let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; + let member_name = self.parse_identifier()?; AlterRoleOperation::AddMember { member_name } } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; + let member_name = self.parse_identifier()?; AlterRoleOperation::DropMember { member_name } } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { if self.consume_token(&Token::Eq) { - let role_name = self.parse_identifier(false)?; + let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { return self.expected("= after WITH NAME ", self.peek_token()); @@ -126,7 +126,7 @@ impl Parser<'_> { } fn parse_pg_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; + let role_name = self.parse_identifier()?; // [ IN DATABASE _`database_name`_ ] let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { @@ -137,7 +137,7 @@ impl Parser<'_> { let operation = if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { - let role_name = self.parse_identifier(false)?; + let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { return self.expected("TO after RENAME", self.peek_token()); @@ -232,7 +232,7 @@ impl Parser<'_> { Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), Some(Keyword::CONNECTION) => { - self.expect_keyword(Keyword::LIMIT)?; + self.expect_keyword_is(Keyword::LIMIT)?; RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) } Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), @@ -256,7 +256,7 @@ impl Parser<'_> { Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), Some(Keyword::VALID) => { - self.expect_keyword(Keyword::UNTIL)?; + self.expect_keyword_is(Keyword::UNTIL)?; RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) } _ => self.expected("option", self.peek_token())?, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 729e5d619..0e0a4a2ac 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -73,6 +73,9 @@ mod recursion { /// Note: Uses an [`std::rc::Rc`] and [`std::cell::Cell`] in order to satisfy the Rust /// borrow checker so the automatic [`DepthGuard`] decrement a /// reference to the counter. + /// + /// Note: when "recursive-protection" feature is enabled, this crate uses additional stack overflow protection + /// for some of its recursive methods. See [`recursive::recursive`] for more information. pub(crate) struct RecursionCounter { remaining_depth: Rc>, } @@ -186,6 +189,15 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; +// A constant EOF token that can be referenced. +const EOF_TOKEN: TokenWithSpan = TokenWithSpan { + token: Token::EOF, + span: Span { + start: Location { line: 0, column: 0 }, + end: Location { line: 0, column: 0 }, + }, +}; + /// Composite types declarations using angle brackets syntax can be arbitrary /// nested such that the following declaration is possible: /// `ARRAY>` @@ -264,19 +276,58 @@ enum ParserState { ConnectBy, } +/// A SQL Parser +/// +/// This struct is the main entry point for parsing SQL queries. +/// +/// # Functionality: +/// * Parsing SQL: see examples on [`Parser::new`] and [`Parser::parse_sql`] +/// * Controlling recursion: See [`Parser::with_recursion_limit`] +/// * Controlling parser options: See [`Parser::with_options`] +/// * Providing your own tokens: See [`Parser::with_tokens`] +/// +/// # Internals +/// +/// The parser uses a [`Tokenizer`] to tokenize the input SQL string into a +/// `Vec` of [`TokenWithSpan`]s and maintains an `index` to the current token +/// being processed. The token vec may contain multiple SQL statements. +/// +/// * The "current" token is the token at `index - 1` +/// * The "next" token is the token at `index` +/// * The "previous" token is the token at `index - 2` +/// +/// If `index` is equal to the length of the token stream, the 'next' token is +/// [`Token::EOF`]. +/// +/// For example, the SQL string "SELECT * FROM foo" will be tokenized into +/// following tokens: +/// ```text +/// [ +/// "SELECT", // token index 0 +/// " ", // whitespace +/// "*", +/// " ", +/// "FROM", +/// " ", +/// "foo" +/// ] +/// ``` +/// +/// pub struct Parser<'a> { + /// The tokens tokens: Vec, /// The index of the first unprocessed token in [`Parser::tokens`]. index: usize, /// The current state of the parser. state: ParserState, - /// The current dialect to use. + /// The SQL dialect to use. dialect: &'a dyn Dialect, /// Additional options that allow you to mix & match behavior /// otherwise constrained to certain dialects (e.g. trailing /// commas) and/or format of parse (e.g. unescaping). options: ParserOptions, - /// Ensure the stack does not overflow by limiting recursion depth. + /// Ensures the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, } @@ -326,6 +377,9 @@ impl<'a> Parser<'a> { /// # Ok(()) /// # } /// ``` + /// + /// Note: when "recursive-protection" feature is enabled, this crate uses additional stack overflow protection + // for some of its recursive methods. See [`recursive::recursive`] for more information. pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self { self.recursion_counter = RecursionCounter::new(recursion_limit); self @@ -565,7 +619,7 @@ impl<'a> Parser<'a> { pub fn parse_comment(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; let token = self.next_token(); let (object_type, object_name) = match token.token { @@ -593,7 +647,7 @@ impl<'a> Parser<'a> { _ => self.expected("comment object_type", token)?, }; - self.expect_keyword(Keyword::IS)?; + self.expect_keyword_is(Keyword::IS)?; let comment = if self.parse_keyword(Keyword::NULL) { None } else { @@ -696,7 +750,7 @@ impl<'a> Parser<'a> { pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::TABLE)?; let table_name = self.parse_object_name(false)?; let partition_action = self .maybe_parse(|parser| { @@ -710,7 +764,7 @@ impl<'a> Parser<'a> { Some(Keyword::SYNC) => Some(AddDropSync::SYNC), _ => None, }; - parser.expect_keyword(Keyword::PARTITIONS)?; + parser.expect_keyword_is(Keyword::PARTITIONS)?; Ok(pa) })? .unwrap_or_default(); @@ -791,7 +845,7 @@ impl<'a> Parser<'a> { }; options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); } else if self.parse_keyword(Keyword::TYPE) { - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; options.push(AttachDuckDBDatabaseOption::Type(ident)); } else { return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); @@ -810,9 +864,9 @@ impl<'a> Parser<'a> { pub fn parse_attach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let database_path = self.parse_identifier(false)?; + let database_path = self.parse_identifier()?; let database_alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -830,7 +884,7 @@ impl<'a> Parser<'a> { pub fn parse_detach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let database_alias = self.parse_identifier(false)?; + let database_alias = self.parse_identifier()?; Ok(Statement::DetachDuckDBDatabase { if_exists, database, @@ -841,8 +895,8 @@ impl<'a> Parser<'a> { pub fn parse_attach_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let database_file_name = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let schema_name = self.parse_identifier(false)?; + self.expect_keyword_is(Keyword::AS)?; + let schema_name = self.parse_identifier()?; Ok(Statement::AttachDatabase { database, schema_name, @@ -851,7 +905,7 @@ impl<'a> Parser<'a> { } pub fn parse_analyze(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; + let has_table_keyword = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; let mut for_columns = false; let mut cache_metadata = false; @@ -874,21 +928,21 @@ impl<'a> Parser<'a> { } Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { - self.expect_keyword(Keyword::COLUMNS)?; + self.expect_keyword_is(Keyword::COLUMNS)?; columns = self .maybe_parse(|parser| { - parser.parse_comma_separated(|p| p.parse_identifier(false)) + parser.parse_comma_separated(|p| p.parse_identifier()) })? .unwrap_or_default(); for_columns = true } Some(Keyword::CACHE) => { - self.expect_keyword(Keyword::METADATA)?; + self.expect_keyword_is(Keyword::METADATA)?; cache_metadata = true } Some(Keyword::COMPUTE) => { - self.expect_keyword(Keyword::STATISTICS)?; + self.expect_keyword_is(Keyword::STATISTICS)?; compute_statistics = true } _ => break, @@ -896,6 +950,7 @@ impl<'a> Parser<'a> { } Ok(Statement::Analyze { + has_table_keyword, table_name, for_columns, columns, @@ -962,6 +1017,7 @@ impl<'a> Parser<'a> { let _guard = self.recursion_counter.try_decrease()?; debug!("parsing expr"); let mut expr = self.parse_prefix()?; + debug!("prefix: {:?}", expr); loop { let next_precedence = self.get_next_precedence()?; @@ -988,19 +1044,19 @@ impl<'a> Parser<'a> { } pub fn parse_savepoint(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; Ok(Statement::Savepoint { name }) } pub fn parse_release(&mut self) -> Result { let _ = self.parse_keyword(Keyword::SAVEPOINT); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; Ok(Statement::ReleaseSavepoint { name }) } pub fn parse_listen(&mut self) -> Result { - let channel = self.parse_identifier(false)?; + let channel = self.parse_identifier()?; Ok(Statement::LISTEN { channel }) } @@ -1008,7 +1064,7 @@ impl<'a> Parser<'a> { let channel = if self.consume_token(&Token::Mul) { Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) } else { - match self.parse_identifier(false) { + match self.parse_identifier() { Ok(expr) => expr, _ => { self.prev_token(); @@ -1020,7 +1076,7 @@ impl<'a> Parser<'a> { } pub fn parse_notify(&mut self) -> Result { - let channel = self.parse_identifier(false)?; + let channel = self.parse_identifier()?; let payload = if self.consume_token(&Token::Comma) { Some(self.parse_literal_string()?) } else { @@ -1049,18 +1105,19 @@ impl<'a> Parser<'a> { | Keyword::CURRENT_USER | Keyword::SESSION_USER | Keyword::USER - if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::None, - null_treatment: None, - filter: None, - over: None, - within_group: vec![], - }))) - } + if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + } Keyword::CURRENT_TIMESTAMP | Keyword::CURRENT_TIME | Keyword::CURRENT_DATE @@ -1075,22 +1132,22 @@ impl<'a> Parser<'a> { Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)), Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)), Keyword::EXISTS - // Support parsing Databricks has a function named `exists`. - if !dialect_of!(self is DatabricksDialect) - || matches!( - self.peek_nth_token(1).token, + // Support parsing Databricks has a function named `exists`. + if !dialect_of!(self is DatabricksDialect) + || matches!( + self.peek_nth_token_ref(1).token, Token::Word(Word { keyword: Keyword::SELECT | Keyword::WITH, .. }) ) => - { - Ok(Some(self.parse_exists_expr(false)?)) - } + { + Ok(Some(self.parse_exists_expr(false)?)) + } Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)), Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), - Keyword::POSITION if self.peek_token().token == Token::LParen => { + Keyword::POSITION if self.peek_token_ref().token == Token::LParen => { Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) } Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), @@ -1098,39 +1155,41 @@ impl<'a> Parser<'a> { Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call - Keyword::ARRAY if self.peek_token() == Token::LBracket => { + Keyword::ARRAY if *self.peek_token_ref() == Token::LBracket => { self.expect_token(&Token::LBracket)?; Ok(Some(self.parse_array_expr(true)?)) } Keyword::ARRAY - if self.peek_token() == Token::LParen - && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => - { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(query), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - }))) - } + if self.peek_token() == Token::LParen + && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => + { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::Subquery(query), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }))) + } Keyword::NOT => Ok(Some(self.parse_not()?)), Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { Ok(Some(self.parse_match_against()?)) } Keyword::STRUCT if self.dialect.supports_struct_literal() => { - Ok(Some(self.parse_struct_literal()?)) + let struct_expr = self.parse_struct_literal()?; + Ok(Some(self.parse_compound_field_access(struct_expr, vec![])?)) } Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; Ok(Some(Expr::Prior(Box::new(expr)))) } - Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + Keyword::MAP if *self.peek_token_ref() == Token::LBrace && self.dialect.support_map_literal_syntax() => { Ok(Some(self.parse_duckdb_map_literal()?)) } _ => Ok(None) @@ -1144,53 +1203,39 @@ impl<'a> Parser<'a> { w_span: Span, ) -> Result { match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident(w_span)]; - let mut ending_wildcard: Option = None; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ending_wildcard = Some(next_token); - break; - } else { - return self.expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if let Some(wildcard_token) = ending_wildcard { - Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(wildcard_token), - )) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } + Token::Period => { + self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![]) + } + Token::LParen => { + let id_parts = vec![w.to_ident(w_span)]; + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) } else { - Ok(Expr::CompoundIdentifier(id_parts)) + let mut expr = self.parse_function(ObjectName(id_parts))?; + // consume all period if it's a method chain + expr = self.try_parse_method(expr)?; + let fields = vec![]; + self.parse_compound_field_access(expr, fields) } } + Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => + { + let ident = Expr::Identifier(w.to_ident(w_span)); + let mut fields = vec![]; + self.parse_multi_dim_subscript(&mut fields)?; + self.parse_compound_field_access(ident, fields) + } + // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html + Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::HexStringLiteral(_) + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value.clone(), + value: self.parse_introduced_string_value()?, + }) + } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1236,7 +1281,7 @@ impl<'a> Parser<'a> { // Note also that naively `SELECT date` looks like a syntax error because the `date` type // name is not followed by a string literal, but in fact in PostgreSQL it is a valid // expression that should parse as the column name "date". - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; let opt_expr = self.maybe_parse(|parser| { match parser.parse_data_type()? { DataType::Interval => parser.parse_interval(), @@ -1259,8 +1304,16 @@ impl<'a> Parser<'a> { return Ok(expr); } - let next_token = self.next_token(); - let expr = match next_token.token { + // Cache some dialect properties to avoid lifetime issues with the + // next_token reference. + + let dialect = self.dialect; + + self.advance_token(); + let next_token_index = self.get_current_index(); + let next_token = self.get_current_token(); + let span = next_token.span; + let expr = match &next_token.token { Token::Word(w) => { // The word we consumed may fall into one of two cases: it has a special meaning, or not. // For example, in Snowflake, the word `interval` may have two meanings depending on the context: @@ -1270,14 +1323,13 @@ impl<'a> Parser<'a> { // // We first try to parse the word and following tokens as a special expression, and if that fails, // we rollback and try to parse it as an identifier. - match self.try_parse(|parser| { - parser.parse_expr_prefix_by_reserved_word(&w, next_token.span) - }) { + let w = w.clone(); + match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) { // This word indicated an expression prefix and parsing was successful Ok(Some(expr)) => Ok(expr), // No expression prefix associated with this word - Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?), + Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?), // If parsing of the word as a special expression failed, we are facing two options: // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`) @@ -1288,7 +1340,7 @@ impl<'a> Parser<'a> { Err(e) => { if !self.dialect.is_reserved_for_identifier(w.keyword) { if let Ok(Some(expr)) = self.maybe_parse(|parser| { - parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span) + parser.parse_expr_prefix_by_unreserved_word(&w, span) }) { return Ok(expr); } @@ -1300,7 +1352,7 @@ impl<'a> Parser<'a> { // array `[1, 2, 3]` Token::LBracket => self.parse_array_expr(false), tok @ Token::Minus | tok @ Token::Plus => { - let op = if tok == Token::Plus { + let op = if *tok == Token::Plus { UnaryOperator::Plus } else { UnaryOperator::Minus @@ -1312,20 +1364,16 @@ impl<'a> Parser<'a> { ), }) } - Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { - Ok(Expr::UnaryOp { - op: UnaryOperator::BangNot, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, - ), - }) - } + Token::ExclamationMark if dialect.supports_bang_not_operator() => Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), + }), tok @ Token::DoubleExclamationMark | tok @ Token::PGSquareRoot | tok @ Token::PGCubeRoot | tok @ Token::AtSign | tok @ Token::Tilde - if dialect_of!(self is PostgreSqlDialect) => + if dialect_is!(dialect is PostgreSqlDialect) => { let op = match tok { Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, @@ -1342,7 +1390,7 @@ impl<'a> Parser<'a> { ), }) } - Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + Token::EscapedStringLiteral(_) if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -1408,11 +1456,11 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } - Token::LBrace if self.dialect.supports_dictionary_syntax() => { + Token::LBrace => { self.prev_token(); - self.parse_duckdb_struct_literal() + self.parse_lbrace_expr() } - _ => self.expected("an expression", next_token), + _ => self.expected_at("an expression", next_token_index), }?; let expr = self.try_parse_method(expr)?; @@ -1427,6 +1475,144 @@ impl<'a> Parser<'a> { } } + /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`. + /// If all the fields are `Expr::Identifier`s, return an [Expr::CompoundIdentifier] instead. + /// If only the root exists, return the root. + /// If self supports [Dialect::supports_partiql], it will fall back when occurs [Token::LBracket] for JsonAccess parsing. + pub fn parse_compound_field_access( + &mut self, + root: Expr, + mut chain: Vec, + ) -> Result { + let mut ending_wildcard: Option = None; + let mut ending_lbracket = false; + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let expr = Expr::Identifier(w.to_ident(next_token.span)); + chain.push(AccessExpr::Dot(expr)); + if self.peek_token().token == Token::LBracket { + if self.dialect.supports_partiql() { + self.next_token(); + ending_lbracket = true; + break; + } else { + self.parse_multi_dim_subscript(&mut chain)? + } + } + } + Token::Mul => { + // Postgres explicitly allows funcnm(tablenm.*) and the + // function array_agg traverses this control flow + if dialect_of!(self is PostgreSqlDialect) { + ending_wildcard = Some(next_token); + break; + } else { + return self.expected("an identifier after '.'", next_token); + } + } + Token::SingleQuotedString(s) => { + let expr = Expr::Identifier(Ident::with_quote('\'', s)); + chain.push(AccessExpr::Dot(expr)); + } + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); + } + } + } + + // if dialect supports partiql, we need to go back one Token::LBracket for the JsonAccess parsing + if self.dialect.supports_partiql() && ending_lbracket { + self.prev_token(); + } + + if let Some(wildcard_token) = ending_wildcard { + if !Self::is_all_ident(&root, &chain) { + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + }; + Ok(Expr::QualifiedWildcard( + ObjectName(Self::exprs_to_idents(root, chain)?), + AttachedToken(wildcard_token), + )) + } else if self.peek_token().token == Token::LParen { + if !Self::is_all_ident(&root, &chain) { + // consume LParen + self.next_token(); + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + }; + let id_parts = Self::exprs_to_idents(root, chain)?; + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) + } else { + self.parse_function(ObjectName(id_parts)) + } + } else { + if Self::is_all_ident(&root, &chain) { + return Ok(Expr::CompoundIdentifier(Self::exprs_to_idents( + root, chain, + )?)); + } + if chain.is_empty() { + return Ok(root); + } + Ok(Expr::CompoundFieldAccess { + root: Box::new(root), + access_chain: chain.clone(), + }) + } + } + + /// Check if the root is an identifier and all fields are identifiers. + fn is_all_ident(root: &Expr, fields: &[AccessExpr]) -> bool { + if !matches!(root, Expr::Identifier(_)) { + return false; + } + fields + .iter() + .all(|x| matches!(x, AccessExpr::Dot(Expr::Identifier(_)))) + } + + /// Convert a root and a list of fields to a list of identifiers. + fn exprs_to_idents(root: Expr, fields: Vec) -> Result, ParserError> { + let mut idents = vec![]; + if let Expr::Identifier(root) = root { + idents.push(root); + for x in fields { + if let AccessExpr::Dot(Expr::Identifier(ident)) = x { + idents.push(ident); + } else { + return parser_err!( + format!("Expected identifier, found: {}", x), + x.span().start + ); + } + } + Ok(idents) + } else { + parser_err!( + format!("Expected identifier, found: {}", root), + root.span().start + ) + } + } + + /// Try to parse OuterJoin expression `(+)` + fn parse_outer_join_expr(&mut self, id_parts: &[Ident]) -> Option { + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) + { + Some(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts.to_vec()) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + None + } + } + pub fn parse_utility_options(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Self::parse_utility_option)?; @@ -1436,7 +1622,7 @@ impl<'a> Parser<'a> { } fn parse_utility_option(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let next_token = self.peek_token(); if next_token == Token::Comma || next_token == Token::RParen { @@ -1463,7 +1649,7 @@ impl<'a> Parser<'a> { return Ok(None); } self.maybe_parse(|p| { - let params = p.parse_comma_separated(|p| p.parse_identifier(false))?; + let params = p.parse_comma_separated(|p| p.parse_identifier())?; p.expect_token(&Token::RParen)?; p.expect_token(&Token::Arrow)?; let expr = p.parse_expr()?; @@ -1509,7 +1695,29 @@ impl<'a> Parser<'a> { } } + /// Tries to parse the body of an [ODBC function] call. + /// i.e. without the enclosing braces + /// + /// ```sql + /// fn myfunc(1,2,3) + /// ``` + /// + /// [ODBC function]: https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/scalar-function-calls?view=sql-server-2017 + fn maybe_parse_odbc_fn_body(&mut self) -> Result, ParserError> { + self.maybe_parse(|p| { + p.expect_keyword(Keyword::FN)?; + let fn_name = p.parse_object_name(false)?; + let mut fn_call = p.parse_function_call(fn_name)?; + fn_call.uses_odbc_syntax = true; + Ok(Expr::Function(fn_call)) + }) + } + pub fn parse_function(&mut self, name: ObjectName) -> Result { + self.parse_function_call(name).map(Expr::Function) + } + + fn parse_function_call(&mut self, name: ObjectName) -> Result { self.expect_token(&Token::LParen)?; // Snowflake permits a subquery to be passed as an argument without @@ -1517,15 +1725,16 @@ impl<'a> Parser<'a> { if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; - return Ok(Expr::Function(Function { + return Ok(Function { name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(subquery), filter: None, null_treatment: None, over: None, within_group: vec![], - })); + }); } let mut args = self.parse_function_argument_list()?; @@ -1578,28 +1787,29 @@ impl<'a> Parser<'a> { let window_spec = self.parse_window_spec()?; Some(WindowType::WindowSpec(window_spec)) } else { - Some(WindowType::NamedWindow(self.parse_identifier(false)?)) + Some(WindowType::NamedWindow(self.parse_identifier()?)) } } else { None }; - Ok(Expr::Function(Function { + Ok(Function { name, + uses_odbc_syntax: false, parameters, args: FunctionArguments::List(args), null_treatment, filter, over, within_group, - })) + }) } /// Optionally parses a null treatment clause. fn parse_null_treatment(&mut self) -> Result, ParserError> { match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) { Some(keyword) => { - self.expect_keyword(Keyword::NULLS)?; + self.expect_keyword_is(Keyword::NULLS)?; Ok(match keyword { Keyword::RESPECT => Some(NullTreatment::RespectNulls), @@ -1619,6 +1829,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::Function(Function { name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args, filter: None, @@ -1645,7 +1856,7 @@ impl<'a> Parser<'a> { let units = self.parse_window_frame_units()?; let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { let start_bound = self.parse_window_frame_bound()?; - self.expect_keyword(Keyword::AND)?; + self.expect_keyword_is(Keyword::AND)?; let end_bound = Some(self.parse_window_frame_bound()?); (start_bound, end_bound) } else { @@ -1751,13 +1962,13 @@ impl<'a> Parser<'a> { let mut operand = None; if !self.parse_keyword(Keyword::WHEN) { operand = Some(Box::new(self.parse_expr()?)); - self.expect_keyword(Keyword::WHEN)?; + self.expect_keyword_is(Keyword::WHEN)?; } let mut conditions = vec![]; let mut results = vec![]; loop { conditions.push(self.parse_expr()?); - self.expect_keyword(Keyword::THEN)?; + self.expect_keyword_is(Keyword::THEN)?; results.push(self.parse_expr()?); if !self.parse_keyword(Keyword::WHEN) { break; @@ -1768,7 +1979,7 @@ impl<'a> Parser<'a> { } else { None }; - self.expect_keyword(Keyword::END)?; + self.expect_keyword_is(Keyword::END)?; Ok(Expr::Case { operand, conditions, @@ -1863,7 +2074,7 @@ impl<'a> Parser<'a> { pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let data_type = self.parse_data_type()?; let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; @@ -1953,7 +2164,7 @@ impl<'a> Parser<'a> { // Parse the subexpr till the IN keyword let expr = p.parse_subexpr(between_prec)?; - p.expect_keyword(Keyword::IN)?; + p.expect_keyword_is(Keyword::IN)?; let from = p.parse_expr()?; p.expect_token(&Token::RParen)?; Ok(Expr::Position { @@ -1997,9 +2208,9 @@ impl<'a> Parser<'a> { // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; - self.expect_keyword(Keyword::PLACING)?; + self.expect_keyword_is(Keyword::PLACING)?; let what_expr = self.parse_expr()?; - self.expect_keyword(Keyword::FROM)?; + self.expect_keyword_is(Keyword::FROM)?; let from_expr = self.parse_expr()?; let mut for_expr = None; if self.parse_keyword(Keyword::FOR) { @@ -2090,7 +2301,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::ERROR) { Ok(Some(ListAggOnOverflow::Error)) } else { - self.expect_keyword(Keyword::TRUNCATE)?; + self.expect_keyword_is(Keyword::TRUNCATE)?; let filler = match self.peek_token().token { Token::Word(w) if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => @@ -2111,7 +2322,7 @@ impl<'a> Parser<'a> { if !with_count && !self.parse_keyword(Keyword::WITHOUT) { self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; } - self.expect_keyword(Keyword::COUNT)?; + self.expect_keyword_is(Keyword::COUNT)?; Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) } } else { @@ -2133,7 +2344,7 @@ impl<'a> Parser<'a> { let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) && self.consume_token(&Token::LParen) { - let week_day = self.parse_identifier(false)?; + let week_day = self.parse_identifier()?; self.expect_token(&Token::RParen)?; Some(week_day) } else { @@ -2175,14 +2386,14 @@ impl<'a> Parser<'a> { Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), _ if self.dialect.allow_extract_custom() => { self.prev_token(); - let custom = self.parse_identifier(false)?; + let custom = self.parse_identifier()?; Ok(DateTimeField::Custom(custom)) } _ => self.expected("date/time field", next_token), }, Token::SingleQuotedString(_) if self.dialect.allow_extract_single_quotes() => { self.prev_token(); - let custom = self.parse_identifier(false)?; + let custom = self.parse_identifier()?; Ok(DateTimeField::Custom(custom)) } _ => self.expected("date/time field", next_token), @@ -2211,6 +2422,31 @@ impl<'a> Parser<'a> { } } + /// Parse expression types that start with a left brace '{'. + /// Examples: + /// ```sql + /// -- Dictionary expr. + /// {'key1': 'value1', 'key2': 'value2'} + /// + /// -- Function call using the ODBC syntax. + /// { fn CONCAT('foo', 'bar') } + /// ``` + fn parse_lbrace_expr(&mut self) -> Result { + let token = self.expect_token(&Token::LBrace)?; + + if let Some(fn_expr) = self.maybe_parse_odbc_fn_body()? { + self.expect_token(&Token::RBrace)?; + return Ok(fn_expr); + } + + if self.dialect.supports_dictionary_syntax() { + self.prev_token(); // Put back the '{' + return self.parse_duckdb_struct_literal(); + } + + self.expected("an expression", token) + } + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors @@ -2219,7 +2455,7 @@ impl<'a> Parser<'a> { pub fn parse_match_against(&mut self) -> Result { let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_keyword(Keyword::AGAINST)?; + self.expect_keyword_is(Keyword::AGAINST)?; self.expect_token(&Token::LParen)?; @@ -2432,7 +2668,7 @@ impl<'a> Parser<'a> { self.peek_token().span.start }); } - let field_name = self.parse_identifier(false)?; + let field_name = self.parse_identifier()?; Ok(Expr::Named { expr: expr.into(), name: field_name, @@ -2462,7 +2698,7 @@ impl<'a> Parser<'a> { F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, { let start_token = self.peek_token(); - self.expect_keyword(Keyword::STRUCT)?; + self.expect_keyword_is(Keyword::STRUCT)?; // Nothing to do if we have no type information. if Token::Lt != self.peek_token() { @@ -2494,10 +2730,10 @@ impl<'a> Parser<'a> { /// Duckdb Struct Data Type fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::STRUCT)?; + self.expect_keyword_is(Keyword::STRUCT)?; self.expect_token(&Token::LParen)?; let struct_body = self.parse_comma_separated(|parser| { - let field_name = parser.parse_identifier(false)?; + let field_name = parser.parse_identifier()?; let field_type = parser.parse_data_type()?; Ok(StructField { @@ -2531,7 +2767,7 @@ impl<'a> Parser<'a> { let field_name = if is_anonymous_field { None } else { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) }; let (field_type, trailing_bracket) = self.parse_data_type_helper()?; @@ -2555,13 +2791,13 @@ impl<'a> Parser<'a> { /// /// [1]: https://duckdb.org/docs/sql/data_types/union.html fn parse_union_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::UNION)?; + self.expect_keyword_is(Keyword::UNION)?; self.expect_token(&Token::LParen)?; let fields = self.parse_comma_separated(|p| { Ok(UnionField { - field_name: p.parse_identifier(false)?, + field_name: p.parse_identifier()?, field_type: p.parse_data_type()?, }) })?; @@ -2600,7 +2836,7 @@ impl<'a> Parser<'a> { /// /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_dictionary_field(&mut self) -> Result { - let key = self.parse_identifier(false)?; + let key = self.parse_identifier()?; self.expect_token(&Token::Colon)?; @@ -2660,7 +2896,7 @@ impl<'a> Parser<'a> { /// /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { - self.expect_keyword(Keyword::MAP)?; + self.expect_keyword_is(Keyword::MAP)?; self.expect_token(&Token::LParen)?; let key_data_type = self.parse_data_type()?; self.expect_token(&Token::Comma)?; @@ -2680,7 +2916,7 @@ impl<'a> Parser<'a> { /// /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::TUPLE)?; + self.expect_keyword_is(Keyword::TUPLE)?; self.expect_token(&Token::LParen)?; let mut field_defs = vec![]; loop { @@ -2729,8 +2965,13 @@ impl<'a> Parser<'a> { return infix; } - let mut tok = self.next_token(); - let regular_binary_operator = match &mut tok.token { + let dialect = self.dialect; + + self.advance_token(); + let tok = self.get_current_token(); + let tok_index = self.get_current_index(); + let span = tok.span; + let regular_binary_operator = match &tok.token { Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), @@ -2748,7 +2989,7 @@ impl<'a> Parser<'a> { Token::Caret => { // In PostgreSQL, ^ stands for the exponentiation operation, // and # stands for XOR. See https://www.postgresql.org/docs/current/functions-math.html - if dialect_of!(self is PostgreSqlDialect) { + if dialect_is!(dialect is PostgreSqlDialect) { Some(BinaryOperator::PGExp) } else { Some(BinaryOperator::BitwiseXor) @@ -2756,22 +2997,22 @@ impl<'a> Parser<'a> { } Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), - Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Token::DuckIntDiv if dialect_is!(dialect is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Token::ShiftLeft if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Token::ShiftRight if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftRight) } - Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { + Token::Sharp if dialect_is!(dialect is PostgreSqlDialect) => { Some(BinaryOperator::PGBitwiseXor) } - Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::Overlap if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGOverlap) } - Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::CaretAt if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGStartsWith) } Token::Tilde => Some(BinaryOperator::PGRegexMatch), @@ -2794,13 +3035,13 @@ impl<'a> Parser<'a> { Token::Question => Some(BinaryOperator::Question), Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), - Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), + Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(s.clone())), Token::Word(w) => match w.keyword { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), Keyword::XOR => Some(BinaryOperator::Xor), - Keyword::OPERATOR if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Keyword::OPERATOR if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { self.expect_token(&Token::LParen)?; // there are special rules for operator names in // postgres so we can not use 'parse_object' @@ -2808,7 +3049,8 @@ impl<'a> Parser<'a> { // See https://www.postgresql.org/docs/current/sql-createoperator.html let mut idents = vec![]; loop { - idents.push(self.next_token().to_string()); + self.advance_token(); + idents.push(self.get_current_token().to_string()); if !self.consume_token(&Token::Period) { break; } @@ -2821,6 +3063,7 @@ impl<'a> Parser<'a> { _ => None, }; + let tok = self.token_at(tok_index); if let Some(op) = regular_binary_operator { if let Some(keyword) = self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL, Keyword::SOME]) @@ -2851,7 +3094,7 @@ impl<'a> Parser<'a> { format!( "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" ), - tok.span.start + span.start ); }; @@ -2980,28 +3223,33 @@ impl<'a> Parser<'a> { tok.span.start ), } - } else if Token::DoubleColon == tok { + } else if Token::DoubleColon == *tok { Ok(Expr::Cast { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, format: None, }) - } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { + } else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() { Ok(Expr::UnaryOp { op: UnaryOperator::PGPostfixFactorial, expr: Box::new(expr), }) - } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - self.parse_subscript(expr) - } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { + } else if Token::LBracket == *tok { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) + { + let mut chain = vec![]; + // back to LBracket + self.prev_token(); + self.parse_multi_dim_subscript(&mut chain)?; + self.parse_compound_field_access(expr, chain) + } else if self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) } else { - self.parse_map_access(expr) + parser_err!("Array subscripting is not supported", tok.span.start) } - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok { self.prev_token(); self.parse_json_access(expr) } else { @@ -3095,15 +3343,24 @@ impl<'a> Parser<'a> { }) } + /// Parse a multi-dimension array accessing like `[1:3][1][1]` + pub fn parse_multi_dim_subscript( + &mut self, + chain: &mut Vec, + ) -> Result<(), ParserError> { + while self.consume_token(&Token::LBracket) { + self.parse_subscript(chain)?; + } + Ok(()) + } + /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` - pub fn parse_subscript(&mut self, expr: Expr) -> Result { + fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - Ok(Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(subscript), - }) + chain.push(AccessExpr::Subscript(subscript)); + Ok(()) } fn parse_json_path_object_key(&mut self) -> Result { @@ -3165,46 +3422,6 @@ impl<'a> Parser<'a> { Ok(JsonPath { path }) } - pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - let mut keys = vec![MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - }]; - loop { - let key = match self.peek_token().token { - Token::LBracket => { - self.next_token(); // consume `[` - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - } - } - // Access on BigQuery nested and repeated expressions can - // mix notations in the same expression. - // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns - Token::Period if dialect_of!(self is BigQueryDialect) => { - self.next_token(); // consume `.` - MapAccessKey { - key: self.parse_expr()?, - syntax: MapAccessSyntax::Period, - } - } - _ => break, - }; - keys.push(key); - } - - Ok(Expr::MapAccess { - column: Box::new(expr), - keys, - }) - } - /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` @@ -3257,7 +3474,7 @@ impl<'a> Parser<'a> { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. let low = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; - self.expect_keyword(Keyword::AND)?; + self.expect_keyword_is(Keyword::AND)?; let high = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; Ok(Expr::Between { expr: Box::new(expr), @@ -3282,12 +3499,26 @@ impl<'a> Parser<'a> { self.dialect.get_next_precedence_default(self) } + /// Return the token at the given location, or EOF if the index is beyond + /// the length of the current set of tokens. + pub fn token_at(&self, index: usize) -> &TokenWithSpan { + self.tokens.get(index).unwrap_or(&EOF_TOKEN) + } + /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) + /// or Token::EOF + /// + /// See [`Self::peek_token_ref`] to avoid the copy. pub fn peek_token(&self) -> TokenWithSpan { self.peek_nth_token(0) } + /// Return a reference to the first non-whitespace token that has not yet + /// been processed or Token::EOF + pub fn peek_token_ref(&self) -> &TokenWithSpan { + self.peek_nth_token_ref(0) + } + /// Returns the `N` next non-whitespace tokens that have not yet been /// processed. /// @@ -3339,7 +3570,12 @@ impl<'a> Parser<'a> { } /// Return nth non-whitespace token that has not yet been processed - pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan { + pub fn peek_nth_token(&self, n: usize) -> TokenWithSpan { + self.peek_nth_token_ref(n).clone() + } + + /// Return nth non-whitespace token that has not yet been processed + pub fn peek_nth_token_ref(&self, mut n: usize) -> &TokenWithSpan { let mut index = self.index; loop { index += 1; @@ -3350,10 +3586,7 @@ impl<'a> Parser<'a> { }) => continue, non_whitespace => { if n == 0 { - return non_whitespace.cloned().unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }); + return non_whitespace.unwrap_or(&EOF_TOKEN); } n -= 1; } @@ -3378,7 +3611,9 @@ impl<'a> Parser<'a> { }) } - /// Look for all of the expected keywords in sequence, without consuming them + /// Return true if the next tokens exactly `expected` + /// + /// Does not advance the current token. fn peek_keywords(&mut self, expected: &[Keyword]) -> bool { let index = self.index; let matched = self.parse_keywords(expected); @@ -3386,10 +3621,33 @@ impl<'a> Parser<'a> { matched } - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) and mark it as processed. OK to call - /// repeatedly after reaching EOF. + /// Advances to the next non-whitespace token and returns a copy. + /// + /// Please use [`Self::advance_token`] and [`Self::get_current_token`] to + /// avoid the copy. pub fn next_token(&mut self) -> TokenWithSpan { + self.advance_token(); + self.get_current_token().clone() + } + + /// Returns the index of the current token + /// + /// This can be used with APIs that expect an index, such as + /// [`Self::token_at`] + pub fn get_current_index(&self) -> usize { + self.index.saturating_sub(1) + } + + /// Return the next unprocessed token, possibly whitespace. + pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { + self.index += 1; + self.tokens.get(self.index - 1) + } + + /// Advances the current token to the next non-whitespace token + /// + /// See [`Self::get_current_token`] to get the current token after advancing + pub fn advance_token(&mut self) { loop { self.index += 1; match self.tokens.get(self.index - 1) { @@ -3397,24 +3655,38 @@ impl<'a> Parser<'a> { token: Token::Whitespace(_), span: _, }) => continue, - token => { - return token - .cloned() - .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF)) - } + _ => break, } } } - /// Return the first unprocessed token, possibly whitespace. - pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { - self.index += 1; - self.tokens.get(self.index - 1) + /// Returns a reference to the current token + /// + /// Does not advance the current token. + pub fn get_current_token(&self) -> &TokenWithSpan { + self.token_at(self.index.saturating_sub(1)) + } + + /// Returns a reference to the previous token + /// + /// Does not advance the current token. + pub fn get_previous_token(&self) -> &TokenWithSpan { + self.token_at(self.index.saturating_sub(2)) } - /// Push back the last one non-whitespace token. Must be called after - /// `next_token()`, otherwise might panic. OK to call after - /// `next_token()` indicates an EOF. + /// Returns a reference to the next token + /// + /// Does not advance the current token. + pub fn get_next_token(&self) -> &TokenWithSpan { + self.token_at(self.index) + } + + /// Seek back the last one non-whitespace token. + /// + /// Must be called after `next_token()`, otherwise might panic. OK to call + /// after `next_token()` indicates an EOF. + /// + // TODO rename to backup_token and deprecate prev_token? pub fn prev_token(&mut self) { loop { assert!(self.index > 0); @@ -3438,24 +3710,59 @@ impl<'a> Parser<'a> { ) } + /// report `found` was encountered instead of `expected` + pub fn expected_ref(&self, expected: &str, found: &TokenWithSpan) -> Result { + parser_err!( + format!("Expected: {expected}, found: {found}"), + found.span.start + ) + } + + /// Report that the current token was found instead of `expected`. + pub fn expected_at(&self, expected: &str, index: usize) -> Result { + let found = self.tokens.get(index).unwrap_or(&EOF_TOKEN); + parser_err!( + format!("Expected: {expected}, found: {found}"), + found.span.start + ) + } + /// If the current token is the `expected` keyword, consume it and returns /// true. Otherwise, no tokens are consumed and returns false. #[must_use] pub fn parse_keyword(&mut self, expected: Keyword) -> bool { - self.parse_keyword_token(expected).is_some() + if self.peek_keyword(expected) { + self.advance_token(); + true + } else { + false + } } + /// If the current token is the `expected` keyword, consume it and returns + /// + /// See [`Self::parse_keyword_token_ref`] to avoid the copy. #[must_use] pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { - match self.peek_token().token { - Token::Word(w) if expected == w.keyword => Some(self.next_token()), + self.parse_keyword_token_ref(expected).cloned() + } + + /// If the current token is the `expected` keyword, consume it and returns a reference to the next token. + /// + #[must_use] + pub fn parse_keyword_token_ref(&mut self, expected: Keyword) -> Option<&TokenWithSpan> { + match &self.peek_token_ref().token { + Token::Word(w) if expected == w.keyword => { + self.advance_token(); + Some(self.get_current_token()) + } _ => None, } } #[must_use] - pub fn peek_keyword(&mut self, expected: Keyword) -> bool { - matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword) + pub fn peek_keyword(&self, expected: Keyword) -> bool { + matches!(&self.peek_token_ref().token, Token::Word(w) if expected == w.keyword) } /// If the current token is the `expected` keyword followed by @@ -3466,16 +3773,16 @@ impl<'a> Parser<'a> { /// not be efficient as it does a loop on the tokens with `peek_nth_token` /// each time. pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if expected == w.keyword => { for (idx, token) in tokens.iter().enumerate() { - if self.peek_nth_token(idx + 1).token != *token { + if self.peek_nth_token_ref(idx + 1).token != *token { return false; } } // consume all tokens for _ in 0..(tokens.len() + 1) { - self.next_token(); + self.advance_token(); } true } @@ -3505,13 +3812,13 @@ impl<'a> Parser<'a> { /// and returns [`None`]. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) => { keywords .iter() .find(|keyword| **keyword == w.keyword) .map(|keyword| { - self.next_token(); + self.advance_token(); *keyword }) } @@ -3526,9 +3833,9 @@ impl<'a> Parser<'a> { Ok(keyword) } else { let keywords: Vec = keywords.iter().map(|x| format!("{x:?}")).collect(); - self.expected( + self.expected_ref( &format!("one of {}", keywords.join(" or ")), - self.peek_token(), + self.peek_token_ref(), ) } } @@ -3536,10 +3843,23 @@ impl<'a> Parser<'a> { /// If the current token is the `expected` keyword, consume the token. /// Otherwise, return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result { - if let Some(token) = self.parse_keyword_token(expected) { - Ok(token) + if let Some(token) = self.parse_keyword_token_ref(expected) { + Ok(token.clone()) + } else { + self.expected_ref(format!("{:?}", &expected).as_str(), self.peek_token_ref()) + } + } + + /// If the current token is the `expected` keyword, consume the token. + /// Otherwise, return an error. + /// + /// This differs from expect_keyword only in that the matched keyword + /// token is not returned. + pub fn expect_keyword_is(&mut self, expected: Keyword) -> Result<(), ParserError> { + if self.parse_keyword_token_ref(expected).is_some() { + Ok(()) } else { - self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) + self.expected_ref(format!("{:?}", &expected).as_str(), self.peek_token_ref()) } } @@ -3547,16 +3867,18 @@ impl<'a> Parser<'a> { /// sequence, consume them and returns Ok. Otherwise, return an Error. pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { for &kw in expected { - self.expect_keyword(kw)?; + self.expect_keyword_is(kw)?; } Ok(()) } /// Consume the next token if it matches the expected token, otherwise return false + /// + /// See [Self::advance_token] to consume the token unconditionally #[must_use] pub fn consume_token(&mut self, expected: &Token) -> bool { - if self.peek_token() == *expected { - self.next_token(); + if self.peek_token_ref() == expected { + self.advance_token(); true } else { false @@ -3580,10 +3902,10 @@ impl<'a> Parser<'a> { /// Bail out if the current token is not an expected keyword, or consume it if it is pub fn expect_token(&mut self, expected: &Token) -> Result { - if self.peek_token() == *expected { + if self.peek_token_ref() == expected { Ok(self.next_token()) } else { - self.expected(&expected.to_string(), self.peek_token()) + self.expected_ref(&expected.to_string(), self.peek_token_ref()) } } @@ -3882,9 +4204,9 @@ impl<'a> Parser<'a> { let mut name = None; if self.peek_token() != Token::LParen { if self.parse_keyword(Keyword::IN) { - storage_specifier = self.parse_identifier(false).ok() + storage_specifier = self.parse_identifier().ok() } else { - name = self.parse_identifier(false).ok(); + name = self.parse_identifier().ok(); } // Storage specifier may follow the name @@ -3892,19 +4214,19 @@ impl<'a> Parser<'a> { && self.peek_token() != Token::LParen && self.parse_keyword(Keyword::IN) { - storage_specifier = self.parse_identifier(false).ok(); + storage_specifier = self.parse_identifier().ok(); } } self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::TYPE)?; - let secret_type = self.parse_identifier(false)?; + self.expect_keyword_is(Keyword::TYPE)?; + let secret_type = self.parse_identifier()?; let mut options = Vec::new(); if self.consume_token(&Token::Comma) { options.append(&mut self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - let value = p.parse_identifier(false)?; + let key = p.parse_identifier()?; + let value = p.parse_identifier()?; Ok(SecretOption { key, value }) })?); } @@ -4020,7 +4342,7 @@ impl<'a> Parser<'a> { /// Parse a UNCACHE TABLE statement pub fn parse_uncache_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::TABLE)?; let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; Ok(Statement::UNCache { @@ -4031,11 +4353,11 @@ impl<'a> Parser<'a> { /// SQLite-specific `CREATE VIRTUAL TABLE` pub fn parse_create_virtual_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::USING)?; - let module_name = self.parse_identifier(false)?; + self.expect_keyword_is(Keyword::USING)?; + let module_name = self.parse_identifier()?; // SQLite docs note that module "arguments syntax is sufficiently // general that the arguments can be made to appear as column // definitions in a traditional CREATE TABLE statement", but @@ -4062,16 +4384,14 @@ impl<'a> Parser<'a> { fn parse_schema_name(&mut self) -> Result { if self.parse_keyword(Keyword::AUTHORIZATION) { - Ok(SchemaName::UnnamedAuthorization( - self.parse_identifier(false)?, - )) + Ok(SchemaName::UnnamedAuthorization(self.parse_identifier()?)) } else { let name = self.parse_object_name(false)?; if self.parse_keyword(Keyword::AUTHORIZATION) { Ok(SchemaName::NamedAuthorization( name, - self.parse_identifier(false)?, + self.parse_identifier()?, )) } else { Ok(SchemaName::Simple(name)) @@ -4192,7 +4512,7 @@ impl<'a> Parser<'a> { )); } else if self.parse_keyword(Keyword::LANGUAGE) { ensure_not_set(&body.language, "LANGUAGE")?; - body.language = Some(self.parse_identifier(false)?); + body.language = Some(self.parse_identifier()?); } else if self.parse_keyword(Keyword::IMMUTABLE) { ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; body.behavior = Some(FunctionBehavior::Immutable); @@ -4278,7 +4598,7 @@ impl<'a> Parser<'a> { temporary: bool, ) -> Result { let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let as_ = self.parse_create_function_body_string()?; let using = self.parse_optional_create_function_using()?; @@ -4315,7 +4635,7 @@ impl<'a> Parser<'a> { let parse_function_param = |parser: &mut Parser| -> Result { - let name = parser.parse_identifier(false)?; + let name = parser.parse_identifier()?; let data_type = parser.parse_data_type()?; Ok(OperateFunctionArg { mode: None, @@ -4343,7 +4663,7 @@ impl<'a> Parser<'a> { }; let language = if self.parse_keyword(Keyword::LANGUAGE) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -4360,7 +4680,7 @@ impl<'a> Parser<'a> { let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; let function_body = if remote_connection.is_none() { - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let expr = self.parse_expr()?; if options.is_none() { options = self.maybe_parse_options(Keyword::OPTIONS)?; @@ -4437,7 +4757,7 @@ impl<'a> Parser<'a> { } let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let trigger_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let option = self .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) @@ -4468,7 +4788,7 @@ impl<'a> Parser<'a> { let period = self.parse_trigger_period()?; let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let referenced_table_name = if self.parse_keyword(Keyword::FROM) { @@ -4486,7 +4806,7 @@ impl<'a> Parser<'a> { } } - self.expect_keyword(Keyword::FOR)?; + self.expect_keyword_is(Keyword::FOR)?; let include_each = self.parse_keyword(Keyword::EACH); let trigger_object = match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { @@ -4500,7 +4820,7 @@ impl<'a> Parser<'a> { .then(|| self.parse_expr()) .transpose()?; - self.expect_keyword(Keyword::EXECUTE)?; + self.expect_keyword_is(Keyword::EXECUTE)?; let exec_body = self.parse_trigger_exec_body()?; @@ -4531,7 +4851,7 @@ impl<'a> Parser<'a> { Keyword::BEFORE => TriggerPeriod::Before, Keyword::AFTER => TriggerPeriod::After, Keyword::INSTEAD => self - .expect_keyword(Keyword::OF) + .expect_keyword_is(Keyword::OF) .map(|_| TriggerPeriod::InsteadOf)?, _ => unreachable!(), }, @@ -4549,9 +4869,7 @@ impl<'a> Parser<'a> { Keyword::INSERT => TriggerEvent::Insert, Keyword::UPDATE => { if self.parse_keyword(Keyword::OF) { - let cols = self.parse_comma_separated(|ident| { - Parser::parse_identifier(ident, false) - })?; + let cols = self.parse_comma_separated(Parser::parse_identifier)?; TriggerEvent::Update(cols) } else { TriggerEvent::Update(vec![]) @@ -4615,7 +4933,7 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; Ok(Statement::CreateMacro { or_replace, @@ -4635,7 +4953,7 @@ impl<'a> Parser<'a> { } fn parse_macro_arg(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let default_expr = if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { @@ -4650,7 +4968,7 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, ) -> Result { - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; let (columns, constraints) = self.parse_columns()?; @@ -4718,7 +5036,7 @@ impl<'a> Parser<'a> { temporary: bool, ) -> Result { let materialized = self.parse_keyword(Keyword::MATERIALIZED); - self.expect_keyword(Keyword::VIEW)?; + self.expect_keyword_is(Keyword::VIEW)?; let if_not_exists = dialect_of!(self is BigQueryDialect|SQLiteDialect|GenericDialect) && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). @@ -4733,7 +5051,7 @@ impl<'a> Parser<'a> { } let cluster_by = if self.parse_keyword(Keyword::CLUSTER) { - self.expect_keyword(Keyword::BY)?; + self.expect_keyword_is(Keyword::BY)?; self.parse_parenthesized_column_list(Optional, false)? } else { vec![] @@ -4768,7 +5086,7 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let query = self.parse_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -4934,7 +5252,7 @@ impl<'a> Parser<'a> { } } Keyword::CONNECTION => { - self.expect_keyword(Keyword::LIMIT)?; + self.expect_keyword_is(Keyword::LIMIT)?; if connection_limit.is_some() { parser_err!("Found multiple CONNECTION LIMIT", loc) } else { @@ -4943,7 +5261,7 @@ impl<'a> Parser<'a> { } } Keyword::VALID => { - self.expect_keyword(Keyword::UNTIL)?; + self.expect_keyword_is(Keyword::UNTIL)?; if valid_until.is_some() { parser_err!("Found multiple VALID UNTIL", loc) } else { @@ -4956,14 +5274,14 @@ impl<'a> Parser<'a> { if !in_role.is_empty() { parser_err!("Found multiple IN ROLE", loc) } else { - in_role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + in_role = self.parse_comma_separated(|p| p.parse_identifier())?; Ok(()) } } else if self.parse_keyword(Keyword::GROUP) { if !in_group.is_empty() { parser_err!("Found multiple IN GROUP", loc) } else { - in_group = self.parse_comma_separated(|p| p.parse_identifier(false))?; + in_group = self.parse_comma_separated(|p| p.parse_identifier())?; Ok(()) } } else { @@ -4974,7 +5292,7 @@ impl<'a> Parser<'a> { if !role.is_empty() { parser_err!("Found multiple ROLE", loc) } else { - role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + role = self.parse_comma_separated(|p| p.parse_identifier())?; Ok(()) } } @@ -4982,7 +5300,7 @@ impl<'a> Parser<'a> { if !user.is_empty() { parser_err!("Found multiple USER", loc) } else { - user = self.parse_comma_separated(|p| p.parse_identifier(false))?; + user = self.parse_comma_separated(|p| p.parse_identifier())?; Ok(()) } } @@ -4990,7 +5308,7 @@ impl<'a> Parser<'a> { if !admin.is_empty() { parser_err!("Found multiple ADMIN", loc) } else { - admin = self.parse_comma_separated(|p| p.parse_identifier(false))?; + admin = self.parse_comma_separated(|p| p.parse_identifier())?; Ok(()) } } @@ -5027,13 +5345,13 @@ impl<'a> Parser<'a> { Some(Keyword::SESSION_USER) => Owner::SessionUser, Some(_) => unreachable!(), None => { - match self.parse_identifier(false) { + match self.parse_identifier() { Ok(ident) => Owner::Ident(ident), Err(e) => { return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) } } - }, + } }; Ok(owner) } @@ -5048,8 +5366,8 @@ impl<'a> Parser<'a> { /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) pub fn parse_create_policy(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; + let name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let policy_type = if self.parse_keyword(Keyword::AS) { @@ -5154,9 +5472,11 @@ impl<'a> Parser<'a> { return self.parse_drop_secret(temporary, persistent); } else if self.parse_keyword(Keyword::TRIGGER) { return self.parse_drop_trigger(); + } else if self.parse_keyword(Keyword::EXTENSION) { + return self.parse_drop_extension(); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, DATABASE, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET, SEQUENCE, or TYPE after DROP", + "DATABASE, EXTENSION, FUNCTION, INDEX, POLICY, PROCEDURE, ROLE, SCHEMA, SECRET, SEQUENCE, STAGE, TABLE, TRIGGER, TYPE, or VIEW after DROP", self.peek_token(), ); }; @@ -5219,8 +5539,8 @@ impl<'a> Parser<'a> { /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) fn parse_drop_policy(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; + let name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let option = self.parse_optional_referential_action(); Ok(Statement::DropPolicy { @@ -5271,9 +5591,9 @@ impl<'a> Parser<'a> { persistent: bool, ) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let storage_specifier = if self.parse_keyword(Keyword::FROM) { - self.parse_identifier(false).ok() + self.parse_identifier().ok() } else { None }; @@ -5312,7 +5632,7 @@ impl<'a> Parser<'a> { return self.parse_mssql_declare(); } - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let binary = Some(self.parse_keyword(Keyword::BINARY)); let sensitive = if self.parse_keyword(Keyword::INSENSITIVE) { @@ -5330,12 +5650,12 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::CURSOR)?; + self.expect_keyword_is(Keyword::CURSOR)?; let declare_type = Some(DeclareType::Cursor); let hold = match self.parse_one_of_keywords(&[Keyword::WITH, Keyword::WITHOUT]) { Some(keyword) => { - self.expect_keyword(Keyword::HOLD)?; + self.expect_keyword_is(Keyword::HOLD)?; match keyword { Keyword::WITH => Some(true), @@ -5346,7 +5666,7 @@ impl<'a> Parser<'a> { None => None, }; - self.expect_keyword(Keyword::FOR)?; + self.expect_keyword_is(Keyword::FOR)?; let query = Some(self.parse_query()?); @@ -5373,7 +5693,7 @@ impl<'a> Parser<'a> { /// ``` /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare pub fn parse_big_query_declare(&mut self) -> Result { - let names = self.parse_comma_separated(|parser| Parser::parse_identifier(parser, false))?; + let names = self.parse_comma_separated(Parser::parse_identifier)?; let data_type = match self.peek_token().token { Token::Word(w) if w.keyword == Keyword::DEFAULT => None, @@ -5389,7 +5709,7 @@ impl<'a> Parser<'a> { } else { // If no variable type - default expression must be specified, per BQ docs. // i.e `DECLARE foo;` is invalid. - self.expect_keyword(Keyword::DEFAULT)?; + self.expect_keyword_is(Keyword::DEFAULT)?; Some(self.parse_expr()?) }; @@ -5435,10 +5755,10 @@ impl<'a> Parser<'a> { pub fn parse_snowflake_declare(&mut self) -> Result { let mut stmts = vec![]; loop { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let (declare_type, for_query, assigned_expr, data_type) = if self.parse_keyword(Keyword::CURSOR) { - self.expect_keyword(Keyword::FOR)?; + self.expect_keyword_is(Keyword::FOR)?; match self.peek_token().token { Token::Word(w) if w.keyword == Keyword::SELECT => ( Some(DeclareType::Cursor), @@ -5553,7 +5873,7 @@ impl<'a> Parser<'a> { /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 pub fn parse_mssql_declare_stmt(&mut self) -> Result { let name = { - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; if !ident.value.starts_with('@') { Err(ParserError::TokenizerError( "Invalid MsSql variable declaration.".to_string(), @@ -5684,7 +6004,7 @@ impl<'a> Parser<'a> { self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let into = if self.parse_keyword(Keyword::INTO) { Some(self.parse_object_name(false)?) @@ -5722,14 +6042,14 @@ impl<'a> Parser<'a> { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { let index_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; Some(index_name) } else { None }; let table_name = self.parse_object_name(false)?; let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -5739,7 +6059,7 @@ impl<'a> Parser<'a> { let include = if self.parse_keyword(Keyword::INCLUDE) { self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(|p| p.parse_identifier(false))?; + let columns = self.parse_comma_separated(|p| p.parse_identifier())?; self.expect_token(&Token::RParen)?; columns } else { @@ -5748,7 +6068,7 @@ impl<'a> Parser<'a> { let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { let not = self.parse_keyword(Keyword::NOT); - self.expect_keyword(Keyword::DISTINCT)?; + self.expect_keyword_is(Keyword::DISTINCT)?; Some(!not) } else { None @@ -5788,17 +6108,17 @@ impl<'a> Parser<'a> { pub fn parse_create_extension(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let (schema, version, cascade) = if self.parse_keyword(Keyword::WITH) { let schema = if self.parse_keyword(Keyword::SCHEMA) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; let version = if self.parse_keyword(Keyword::VERSION) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -5819,6 +6139,25 @@ impl<'a> Parser<'a> { }) } + /// Parse a PostgreSQL-specific [Statement::DropExtension] statement. + pub fn parse_drop_extension(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let names = self.parse_comma_separated(|p| p.parse_identifier())?; + let cascade_or_restrict = + self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]); + Ok(Statement::DropExtension { + names, + if_exists, + cascade_or_restrict: cascade_or_restrict + .map(|k| match k { + Keyword::CASCADE => Ok(ReferentialAction::Cascade), + Keyword::RESTRICT => Ok(ReferentialAction::Restrict), + _ => self.expected("CASCADE or RESTRICT", self.peek_token()), + }) + .transpose()?, + }) + } + //TODO: Implement parsing for Skewed pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { @@ -5844,10 +6183,10 @@ impl<'a> Parser<'a> { hive_format.row_format = Some(self.parse_row_format()?); } Some(Keyword::STORED) => { - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; if self.parse_keyword(Keyword::INPUTFORMAT) { let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::OUTPUTFORMAT)?; + self.expect_keyword_is(Keyword::OUTPUTFORMAT)?; let output_format = self.parse_expr()?; hive_format.storage = Some(HiveIOFormat::IOF { input_format, @@ -5880,7 +6219,7 @@ impl<'a> Parser<'a> { } pub fn parse_row_format(&mut self) -> Result { - self.expect_keyword(Keyword::FORMAT)?; + self.expect_keyword_is(Keyword::FORMAT)?; match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { Some(Keyword::SERDE) => { let class = self.parse_literal_string()?; @@ -5901,13 +6240,13 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::FieldsTerminatedBy, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::FieldsEscapedBy, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); } } else { @@ -5922,7 +6261,7 @@ impl<'a> Parser<'a> { ]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::CollectionItemsTerminatedBy, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); } else { break; @@ -5936,7 +6275,7 @@ impl<'a> Parser<'a> { ]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::MapKeysTerminatedBy, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); } else { break; @@ -5946,7 +6285,7 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::LinesTerminatedBy, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); } else { break; @@ -5956,7 +6295,7 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { row_delimiters.push(HiveRowDelimiter { delimiter: HiveDelimiter::NullDefinedAs, - char: self.parse_identifier(false)?, + char: self.parse_identifier()?, }); } else { break; @@ -5977,7 +6316,7 @@ impl<'a> Parser<'a> { fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { - Ok(Some(self.parse_identifier(false)?)) + Ok(Some(self.parse_identifier()?)) } else { Ok(None) } @@ -6113,22 +6452,11 @@ impl<'a> Parser<'a> { None }; - let on_commit: Option = - if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) - { - Some(OnCommit::DeleteRows) - } else if self.parse_keywords(&[ - Keyword::ON, - Keyword::COMMIT, - Keyword::PRESERVE, - Keyword::ROWS, - ]) { - Some(OnCommit::PreserveRows) - } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { - Some(OnCommit::Drop) - } else { - None - }; + let on_commit = if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT]) { + Some(self.parse_create_table_on_commit()?) + } else { + None + }; let strict = self.parse_keyword(Keyword::STRICT); @@ -6184,6 +6512,21 @@ impl<'a> Parser<'a> { .build()) } + pub(crate) fn parse_create_table_on_commit(&mut self) -> Result { + if self.parse_keywords(&[Keyword::DELETE, Keyword::ROWS]) { + Ok(OnCommit::DeleteRows) + } else if self.parse_keywords(&[Keyword::PRESERVE, Keyword::ROWS]) { + Ok(OnCommit::PreserveRows) + } else if self.parse_keywords(&[Keyword::DROP]) { + Ok(OnCommit::Drop) + } else { + parser_err!( + "Expecting DELETE ROWS, PRESERVE ROWS or DROP", + self.peek_token() + ) + } + } + /// Parse configuration like partitioning, clustering information during the table creation. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) @@ -6204,7 +6547,7 @@ impl<'a> Parser<'a> { if dialect_of!(self is BigQueryDialect | GenericDialect) { if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, + self.parse_comma_separated(|p| p.parse_identifier())?, )); }; @@ -6295,13 +6638,13 @@ impl<'a> Parser<'a> { } pub fn parse_procedure_param(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let data_type = self.parse_data_type()?; Ok(ProcedureParam { name, data_type }) } pub fn parse_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified } else { @@ -6315,7 +6658,7 @@ impl<'a> Parser<'a> { let mut options = vec![]; loop { if self.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(self.parse_identifier(false)?); + let name = Some(self.parse_identifier()?); if let Some(option) = self.parse_optional_column_option()? { options.push(ColumnOptionDef { name, option }); } else { @@ -6534,7 +6877,7 @@ impl<'a> Parser<'a> { } pub(crate) fn parse_tag(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; self.expect_token(&Token::Eq)?; let value = self.parse_literal_string()?; @@ -6649,9 +6992,9 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::INTO)?; + self.expect_keyword_is(Keyword::INTO)?; let num_buckets = self.parse_number_value()?; - self.expect_keyword(Keyword::BUCKETS)?; + self.expect_keyword_is(Keyword::BUCKETS)?; Some(ClusteredBy { columns, sorted_by, @@ -6723,7 +7066,7 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -6739,6 +7082,8 @@ impl<'a> Parser<'a> { .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); } + let nulls_distinct = self.parse_optional_nulls_distinct()?; + // optional index name let index_name = self.parse_optional_indent()?; let index_type = self.parse_optional_using_then_index_type()?; @@ -6754,11 +7099,12 @@ impl<'a> Parser<'a> { columns, index_options, characteristics, + nulls_distinct, })) } Token::Word(w) if w.keyword == Keyword::PRIMARY => { // after `PRIMARY` always stay `KEY` - self.expect_keyword(Keyword::KEY)?; + self.expect_keyword_is(Keyword::KEY)?; // optional index name let index_name = self.parse_optional_indent()?; @@ -6777,11 +7123,11 @@ impl<'a> Parser<'a> { })) } Token::Word(w) if w.keyword == Keyword::FOREIGN => { - self.expect_keyword(Keyword::KEY)?; + self.expect_keyword_is(Keyword::KEY)?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_keyword(Keyword::REFERENCES)?; + self.expect_keyword_is(Keyword::REFERENCES)?; let foreign_table = self.parse_object_name(false)?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let referred_columns = self.parse_parenthesized_column_list(Optional, false)?; let mut on_delete = None; let mut on_update = None; loop { @@ -6876,6 +7222,20 @@ impl<'a> Parser<'a> { } } + fn parse_optional_nulls_distinct(&mut self) -> Result { + Ok(if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword_is(Keyword::DISTINCT)?; + if not { + NullsDistinctOption::NotDistinct + } else { + NullsDistinctOption::Distinct + } + } else { + NullsDistinctOption::None + }) + } + pub fn maybe_parse_options( &mut self, keyword: Keyword, @@ -6937,7 +7297,7 @@ impl<'a> Parser<'a> { /// Parse `[ident]`, mostly `ident` is name, like: /// `window_name`, `index_name`, ... pub fn parse_optional_indent(&mut self) -> Result, ParserError> { - self.maybe_parse(|parser| parser.parse_identifier(false)) + self.maybe_parse(|parser| parser.parse_identifier()) } #[must_use] @@ -6978,7 +7338,7 @@ impl<'a> Parser<'a> { match self.peek_token().token { Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { - Ok(SqlOption::Ident(self.parse_identifier(false)?)) + Ok(SqlOption::Ident(self.parse_identifier()?)) } Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => { self.parse_option_partition() @@ -6987,7 +7347,7 @@ impl<'a> Parser<'a> { self.parse_option_clustered() } _ => { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; self.expect_token(&Token::Eq)?; let value = self.parse_expr()?; @@ -7016,7 +7376,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; + let name = p.parse_identifier()?; let asc = p.parse_asc_desc(); Ok(ClusteredIndex { name, asc }) @@ -7033,11 +7393,11 @@ impl<'a> Parser<'a> { } pub fn parse_option_partition(&mut self) -> Result { - self.expect_keyword(Keyword::PARTITION)?; + self.expect_keyword_is(Keyword::PARTITION)?; self.expect_token(&Token::LParen)?; - let column_name = self.parse_identifier(false)?; + let column_name = self.parse_identifier()?; - self.expect_keyword(Keyword::RANGE)?; + self.expect_keyword_is(Keyword::RANGE)?; let range_direction = if self.parse_keyword(Keyword::LEFT) { Some(PartitionRangeDirection::Left) } else if self.parse_keyword(Keyword::RIGHT) { @@ -7070,7 +7430,7 @@ impl<'a> Parser<'a> { pub fn parse_projection_select(&mut self) -> Result { self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::SELECT)?; + self.expect_keyword_is(Keyword::SELECT)?; let projection = self.parse_projection()?; let group_by = self.parse_optional_group_by()?; let order_by = self.parse_optional_order_by()?; @@ -7083,7 +7443,7 @@ impl<'a> Parser<'a> { } pub fn parse_alter_table_add_projection(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let query = self.parse_projection_select()?; Ok(AlterTableOperation::AddProjection { if_not_exists, @@ -7141,18 +7501,18 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::RENAME) { if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { - let old_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier(false)?; + let old_name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::TO)?; + let new_name = self.parse_identifier()?; AlterTableOperation::RenameConstraint { old_name, new_name } } else if self.parse_keyword(Keyword::TO) { let table_name = self.parse_object_name(false)?; AlterTableOperation::RenameTable { table_name } } else { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_column_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier(false)?; + let old_column_name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::TO)?; + let new_column_name = self.parse_identifier()?; AlterTableOperation::RenameColumn { old_column_name, new_column_name, @@ -7162,10 +7522,10 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { AlterTableOperation::DisableRowLevelSecurity {} } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::DisableRule { name } } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::DisableTrigger { name } } else { return self.expected( @@ -7175,24 +7535,24 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::ENABLE) { if self.parse_keywords(&[Keyword::ALWAYS, Keyword::RULE]) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableAlwaysRule { name } } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableAlwaysTrigger { name } } else if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { AlterTableOperation::EnableRowLevelSecurity {} } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::RULE]) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableReplicaRule { name } } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableReplicaTrigger { name } } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableRule { name } } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::EnableTrigger { name } } else { return self.expected( @@ -7204,9 +7564,9 @@ impl<'a> Parser<'a> { && dialect_of!(self is ClickHouseDialect|GenericDialect) { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -7219,9 +7579,9 @@ impl<'a> Parser<'a> { && dialect_of!(self is ClickHouseDialect|GenericDialect) { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -7249,7 +7609,7 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::CONSTRAINT) { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let cascade = self.parse_keyword(Keyword::CASCADE); AlterTableOperation::DropConstraint { if_exists, @@ -7264,12 +7624,14 @@ impl<'a> Parser<'a> { && dialect_of!(self is ClickHouseDialect|GenericDialect) { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; AlterTableOperation::DropProjection { if_exists, name } + } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { + AlterTableOperation::DropClusteringKey } else { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier(false)?; + let column_name = self.parse_identifier()?; let cascade = self.parse_keyword(Keyword::CASCADE); AlterTableOperation::DropColumn { column_name, @@ -7281,7 +7643,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let before = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; + self.expect_keyword_is(Keyword::RENAME)?; self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; self.expect_token(&Token::LParen)?; let renames = self.parse_comma_separated(Parser::parse_expr)?; @@ -7292,8 +7654,8 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::CHANGE) { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_name = self.parse_identifier(false)?; - let new_name = self.parse_identifier(false)?; + let old_name = self.parse_identifier()?; + let new_name = self.parse_identifier()?; let data_type = self.parse_data_type()?; let mut options = vec![]; while let Some(option) = self.parse_optional_column_option()? { @@ -7311,7 +7673,7 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::MODIFY) { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let col_name = self.parse_identifier(false)?; + let col_name = self.parse_identifier()?; let data_type = self.parse_data_type()?; let mut options = vec![]; while let Some(option) = self.parse_optional_column_option()? { @@ -7328,7 +7690,7 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::ALTER) { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let column_name = self.parse_identifier(false)?; + let column_name = self.parse_identifier()?; let is_postgresql = dialect_of!(self is PostgreSqlDialect); let op: AlterColumnOperation = if self.parse_keywords(&[ @@ -7389,7 +7751,7 @@ impl<'a> Parser<'a> { }; AlterTableOperation::AlterColumn { column_name, op } } else if self.parse_keyword(Keyword::SWAP) { - self.expect_keyword(Keyword::WITH)?; + self.expect_keyword_is(Keyword::WITH)?; let table_name = self.parse_object_name(false)?; AlterTableOperation::SwapWith { table_name } } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) @@ -7414,8 +7776,8 @@ impl<'a> Parser<'a> { { let partition = self.parse_part_or_partition()?; let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) + self.expect_keyword_is(Keyword::NAME)?; + Some(self.parse_identifier()?) } else { None }; @@ -7428,8 +7790,8 @@ impl<'a> Parser<'a> { { let partition = self.parse_part_or_partition()?; let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) + self.expect_keyword_is(Keyword::NAME)?; + Some(self.parse_identifier()?) } else { None }; @@ -7437,6 +7799,15 @@ impl<'a> Parser<'a> { partition, with_name, } + } else if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let exprs = self.parse_comma_separated(|parser| parser.parse_expr())?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::ClusterBy { exprs } + } else if self.parse_keywords(&[Keyword::SUSPEND, Keyword::RECLUSTER]) { + AlterTableOperation::SuspendRecluster + } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { + AlterTableOperation::ResumeRecluster } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; @@ -7485,12 +7856,12 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::LOCATION) { location = Some(HiveSetLocation { has_set: false, - location: self.parse_identifier(false)?, + location: self.parse_identifier()?, }); } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { location = Some(HiveSetLocation { has_set: true, - location: self.parse_identifier(false)?, + location: self.parse_identifier()?, }); } @@ -7534,7 +7905,7 @@ impl<'a> Parser<'a> { let with_options = self.parse_options(Keyword::WITH)?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let query = self.parse_query()?; Ok(Statement::AlterView { @@ -7560,6 +7931,7 @@ impl<'a> Parser<'a> { } else { Ok(Statement::Call(Function { name: object_name, + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, over: None, @@ -7641,7 +8013,7 @@ impl<'a> Parser<'a> { let cursor = if self.parse_keyword(Keyword::ALL) { CloseCursor::All } else { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; CloseCursor::Specific { name } }; @@ -7663,7 +8035,7 @@ impl<'a> Parser<'a> { Keyword::FORCE_NULL, Keyword::ENCODING, ]) { - Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier(false)?), + Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier()?), Some(Keyword::FREEZE) => CopyOption::Freeze(!matches!( self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), Some(Keyword::FALSE) @@ -7739,12 +8111,12 @@ impl<'a> Parser<'a> { } Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) => { CopyLegacyCsvOption::ForceNotNull( - self.parse_comma_separated(|p| p.parse_identifier(false))?, + self.parse_comma_separated(|p| p.parse_identifier())?, ) } Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::QUOTE]) => { CopyLegacyCsvOption::ForceQuote( - self.parse_comma_separated(|p| p.parse_identifier(false))?, + self.parse_comma_separated(|p| p.parse_identifier())?, ) } _ => self.expected("csv option", self.peek_token())?, @@ -7990,6 +8362,23 @@ impl<'a> Parser<'a> { } } + pub fn parse_enum_values(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let values = self.parse_comma_separated(|parser| { + let name = parser.parse_literal_string()?; + let e = if parser.consume_token(&Token::Eq) { + let value = parser.parse_number()?; + EnumMember::NamedValue(name, value) + } else { + EnumMember::Name(name) + }; + Ok(e) + })?; + self.expect_token(&Token::RParen)?; + + Ok(values) + } + /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { let (ty, trailing_bracket) = self.parse_data_type_helper()?; @@ -8006,9 +8395,13 @@ impl<'a> Parser<'a> { fn parse_data_type_helper( &mut self, ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { - let next_token = self.next_token(); + let dialect = self.dialect; + self.advance_token(); + let next_token = self.get_current_token(); + let next_token_index = self.get_current_index(); + let mut trailing_bracket: MatchedTrailingBracket = false.into(); - let mut data = match next_token.token { + let mut data = match &next_token.token { Token::Word(w) => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), Keyword::BOOL => Ok(DataType::Bool), @@ -8022,7 +8415,9 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::PRECISION) { Ok(DataType::DoublePrecision) } else { - Ok(DataType::Double) + Ok(DataType::Double( + self.parse_exact_number_optional_precision_scale()?, + )) } } Keyword::TINYINT => { @@ -8144,6 +8539,13 @@ impl<'a> Parser<'a> { Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob), Keyword::LONGBLOB => Ok(DataType::LongBlob), Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), + Keyword::BIT => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::BitVarying(self.parse_optional_precision()?)) + } else { + Ok(DataType::Bit(self.parse_optional_precision()?)) + } + } Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), Keyword::DATE32 => Ok(DataType::Date32), @@ -8221,7 +8623,9 @@ impl<'a> Parser<'a> { Keyword::BIGDECIMAL => Ok(DataType::BigDecimal( self.parse_exact_number_optional_precision_scale()?, )), - Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)), + Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)), + Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))), + Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { @@ -8239,12 +8643,12 @@ impl<'a> Parser<'a> { )))) } } - Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { + Keyword::STRUCT if dialect_is!(dialect is DuckDbDialect) => { self.prev_token(); let field_defs = self.parse_duckdb_struct_type_def()?; Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) } - Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | GenericDialect) => { self.prev_token(); let (field_defs, _trailing_bracket) = self.parse_struct_type_def(Self::parse_struct_field_def)?; @@ -8254,18 +8658,18 @@ impl<'a> Parser<'a> { StructBracketKind::AngleBrackets, )) } - Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Keyword::UNION if dialect_is!(dialect is DuckDbDialect | GenericDialect) => { self.prev_token(); let fields = self.parse_union_type_def()?; Ok(DataType::Union(fields)) } - Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::NULLABLE if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { Ok(self.parse_sub_type(DataType::Nullable)?) } - Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::LOWCARDINALITY if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { Ok(self.parse_sub_type(DataType::LowCardinality)?) } - Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::MAP if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { self.prev_token(); let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; Ok(DataType::Map( @@ -8273,18 +8677,22 @@ impl<'a> Parser<'a> { Box::new(value_data_type), )) } - Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::NESTED if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { self.expect_token(&Token::LParen)?; let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; self.expect_token(&Token::RParen)?; Ok(DataType::Nested(field_defs)) } - Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::TUPLE if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { self.prev_token(); let field_defs = self.parse_click_house_tuple_def()?; Ok(DataType::Tuple(field_defs)) } Keyword::TRIGGER => Ok(DataType::Trigger), + Keyword::ANY if self.peek_keyword(Keyword::TYPE) => { + let _ = self.parse_keyword(Keyword::TYPE); + Ok(DataType::AnyType) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; @@ -8295,7 +8703,7 @@ impl<'a> Parser<'a> { } } }, - _ => self.expected("a data type name", next_token), + _ => self.expected_at("a data type name", next_token_index), }?; // Parse array data types. Note: this is postgresql-specific and different from @@ -8333,9 +8741,9 @@ impl<'a> Parser<'a> { /// Strictly parse `identifier AS identifier` pub fn parse_identifier_with_alias(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; - let alias = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; + self.expect_keyword_is(Keyword::AS)?; + let alias = self.parse_identifier()?; Ok(IdentWithAlias { ident, alias }) } @@ -8467,17 +8875,28 @@ impl<'a> Parser<'a> { /// in this context on BigQuery. pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { let mut idents = vec![]; - loop { - if self.dialect.supports_object_name_double_dot_notation() - && idents.len() == 1 - && self.consume_token(&Token::Period) - { - // Empty string here means default schema - idents.push(Ident::new("")); + + if dialect_of!(self is BigQueryDialect) && in_table_clause { + loop { + let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?; + idents.push(ident); + if !self.consume_token(&Token::Period) && !end_with_period { + break; + } } - idents.push(self.parse_identifier(in_table_clause)?); - if !self.consume_token(&Token::Period) { - break; + } else { + loop { + if self.dialect.supports_object_name_double_dot_notation() + && idents.len() == 1 + && self.consume_token(&Token::Period) + { + // Empty string here means default schema + idents.push(Ident::new("")); + } + idents.push(self.parse_identifier()?); + if !self.consume_token(&Token::Period) { + break; + } } } @@ -8509,14 +8928,14 @@ impl<'a> Parser<'a> { pub fn parse_identifiers(&mut self) -> Result, ParserError> { let mut idents = vec![]; loop { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) => { - idents.push(w.to_ident(self.peek_token().span)); + idents.push(w.to_ident(self.peek_token_ref().span)); } Token::EOF | Token::Eq => break, _ => {} } - self.next_token(); + self.advance_token(); } Ok(idents) } @@ -8612,29 +9031,32 @@ impl<'a> Parser<'a> { } /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) - /// - /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or - /// similar table clause. Currently, this is used only to support unquoted hyphenated identifiers in - // this context on BigQuery. - pub fn parse_identifier(&mut self, in_table_clause: bool) -> Result { + pub fn parse_identifier(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Word(w) => { - let mut ident = w.to_ident(next_token.span); + Token::Word(w) => Ok(w.to_ident(next_token.span)), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), + Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), + _ => self.expected("identifier", next_token), + } + } - // On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or - // TABLE clause [0]. - // - // The first segment must be an ordinary unquoted identifier, e.g. it must not start - // with a digit. Subsequent segments are either must either be valid identifiers or - // integers, e.g. foo-123 is allowed, but foo-123a is not. - // - // [0] https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical - if dialect_of!(self is BigQueryDialect) - && w.quote_style.is_none() - && in_table_clause - { - let mut requires_whitespace = false; + /// On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or + /// TABLE clause. + /// + /// The first segment must be an ordinary unquoted identifier, e.g. it must not start + /// with a digit. Subsequent segments are either must either be valid identifiers or + /// integers, e.g. foo-123 is allowed, but foo-123a is not. + /// + /// [BigQuery-lexical](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical) + /// + /// Return a tuple of the identifier and a boolean indicating it ends with a period. + fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> { + match self.peek_token().token { + Token::Word(w) => { + let mut requires_whitespace = false; + let mut ident = w.to_ident(self.next_token().span); + if w.quote_style.is_none() { while matches!(self.peek_token_no_skip().token, Token::Minus) { self.next_token(); ident.value.push('-'); @@ -8648,9 +9070,30 @@ impl<'a> Parser<'a> { ident.value.push_str(&next_word.value); false } - Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { - ident.value.push_str(&s); - true + Token::Number(s, false) => { + // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`. + // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`. + // + // If a number token is followed by a period, it is part of an [ObjectName]. + // Return the identifier with `true` if the number token is followed by a period, indicating that + // parsing should continue for the next part of the hyphenated identifier. + if s.ends_with('.') { + let Some(s) = s.split('.').next().filter(|s| { + !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()) + }) else { + return self.expected( + "continuation of hyphenated identifier", + TokenWithSpan::new(Token::Number(s, false), token.span), + ); + }; + ident.value.push_str(s); + return Ok((ident, true)); + } else { + ident.value.push_str(&s); + } + // If next token is period, then it is part of an ObjectName and we don't expect whitespace + // after the number. + !matches!(self.peek_token().token, Token::Period) } _ => { return self @@ -8669,11 +9112,9 @@ impl<'a> Parser<'a> { } } } - Ok(ident) + Ok((ident, false)) } - Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), - Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), - _ => self.expected("identifier", next_token), + _ => Ok((self.parse_identifier()?, false)), } } @@ -8695,7 +9136,7 @@ impl<'a> Parser<'a> { /// Parses a column definition within a view. fn parse_view_column(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) && self.parse_keyword(Keyword::OPTIONS)) || (dialect_of!(self is SnowflakeDialect | GenericDialect) @@ -8730,7 +9171,7 @@ impl<'a> Parser<'a> { self.next_token(); Ok(vec![]) } else { - let cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; + let cols = self.parse_comma_separated(|p| p.parse_identifier())?; self.expect_token(&Token::RParen)?; Ok(cols) } @@ -8745,7 +9186,7 @@ impl<'a> Parser<'a> { fn parse_table_alias_column_defs(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let cols = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; + let name = p.parse_identifier()?; let data_type = p.maybe_parse(|p| p.parse_data_type())?; Ok(TableAliasColumnDef { name, data_type }) })?; @@ -8781,7 +9222,7 @@ impl<'a> Parser<'a> { /// /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { - self.expect_keyword(Keyword::DATETIME64)?; + self.expect_keyword_is(Keyword::DATETIME64)?; self.expect_token(&Token::LParen)?; let precision = self.parse_literal_uint()?; let time_zone = if self.consume_token(&Token::Comma) { @@ -8904,7 +9345,7 @@ impl<'a> Parser<'a> { (vec![], false) } else { let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; - self.expect_keyword(Keyword::FROM)?; + self.expect_keyword_is(Keyword::FROM)?; (tables, true) } } else { @@ -8986,6 +9427,7 @@ impl<'a> Parser<'a> { let mut analyze = false; let mut verbose = false; let mut query_plan = false; + let mut estimate = false; let mut format = None; let mut options = None; @@ -8998,6 +9440,8 @@ impl<'a> Parser<'a> { options = Some(self.parse_utility_options()?) } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { query_plan = true; + } else if self.parse_keyword(Keyword::ESTIMATE) { + estimate = true; } else { analyze = self.parse_keyword(Keyword::ANALYZE); verbose = self.parse_keyword(Keyword::VERBOSE); @@ -9015,6 +9459,7 @@ impl<'a> Parser<'a> { analyze, verbose, query_plan, + estimate, statement: Box::new(statement), format, options, @@ -9051,9 +9496,9 @@ impl<'a> Parser<'a> { /// expect the initial keyword to be already consumed pub fn parse_query(&mut self) -> Result, ParserError> { let _guard = self.recursion_counter.try_decrease()?; - let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) { + let with = if let Some(with_token) = self.parse_keyword_token_ref(Keyword::WITH) { Some(With { - with_token: with_token.into(), + with_token: with_token.clone().into(), recursive: self.parse_keyword(Keyword::RECURSIVE), cte_tables: self.parse_comma_separated(Parser::parse_cte)?, }) @@ -9154,7 +9599,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::NULL) { Some(FormatClause::Null) } else { - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; Some(FormatClause::Identifier(ident)) } } else { @@ -9183,7 +9628,7 @@ impl<'a> Parser<'a> { && self.parse_keyword(Keyword::SETTINGS) { let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; + let key = p.parse_identifier()?; p.expect_token(&Token::Eq)?; let value = p.parse_value()?; Ok(Setting { key, value }) @@ -9244,7 +9689,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::ELEMENTS) { elements = true; } else if self.parse_keyword(Keyword::BINARY) { - self.expect_keyword(Keyword::BASE64)?; + self.expect_keyword_is(Keyword::BASE64)?; binary_base64 = true; } else if self.parse_keyword(Keyword::ROOT) { self.expect_token(&Token::LParen)?; @@ -9299,7 +9744,7 @@ impl<'a> Parser<'a> { /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) pub fn parse_cte(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let mut cte = if self.parse_keyword(Keyword::AS) { let mut is_materialized = None; @@ -9328,7 +9773,7 @@ impl<'a> Parser<'a> { } } else { let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let mut is_materialized = None; if dialect_of!(self is PostgreSqlDialect) { if self.parse_keyword(Keyword::MATERIALIZED) { @@ -9352,7 +9797,7 @@ impl<'a> Parser<'a> { } }; if self.parse_keyword(Keyword::FROM) { - cte.from = Some(self.parse_identifier(false)?); + cte.from = Some(self.parse_identifier()?); } Ok(cte) } @@ -9484,7 +9929,12 @@ impl<'a> Parser<'a> { top = Some(self.parse_top()?); } - let projection = self.parse_projection()?; + let projection = + if self.dialect.supports_empty_projections() && self.peek_keyword(Keyword::FROM) { + vec![] + } else { + self.parse_projection()? + }; let into = if self.parse_keyword(Keyword::INTO) { let temporary = self @@ -9730,7 +10180,7 @@ impl<'a> Parser<'a> { /// Parse a `SET ROLE` statement. Expects SET to be consumed already. fn parse_set_role(&mut self, modifier: Option) -> Result { - self.expect_keyword(Keyword::ROLE)?; + self.expect_keyword_is(Keyword::ROLE)?; let context_modifier = match modifier { Some(Keyword::LOCAL) => ContextModifier::Local, Some(Keyword::SESSION) => ContextModifier::Session, @@ -9740,7 +10190,7 @@ impl<'a> Parser<'a> { let role_name = if self.parse_keyword(Keyword::NONE) { None } else { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) }; Ok(Statement::SetRole { context_modifier, @@ -9765,12 +10215,10 @@ impl<'a> Parser<'a> { && self.consume_token(&Token::LParen) { let variables = OneOrManyWithParens::Many( - self.parse_comma_separated(|parser: &mut Parser<'a>| { - parser.parse_identifier(false) - })? - .into_iter() - .map(|ident| ObjectName(vec![ident])) - .collect(), + self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? + .into_iter() + .map(|ident| ObjectName(vec![ident])) + .collect(), ); self.expect_token(&Token::RParen)?; variables @@ -10060,23 +10508,46 @@ impl<'a> Parser<'a> { } else if dialect_of!(self is DatabricksDialect) { self.parse_one_of_keywords(&[Keyword::CATALOG, Keyword::DATABASE, Keyword::SCHEMA]) } else if dialect_of!(self is SnowflakeDialect) { - self.parse_one_of_keywords(&[Keyword::DATABASE, Keyword::SCHEMA, Keyword::WAREHOUSE]) + self.parse_one_of_keywords(&[ + Keyword::DATABASE, + Keyword::SCHEMA, + Keyword::WAREHOUSE, + Keyword::ROLE, + Keyword::SECONDARY, + ]) } else { None // No specific keywords for other dialects, including GenericDialect }; - let obj_name = self.parse_object_name(false)?; - let result = match parsed_keyword { - Some(Keyword::CATALOG) => Use::Catalog(obj_name), - Some(Keyword::DATABASE) => Use::Database(obj_name), - Some(Keyword::SCHEMA) => Use::Schema(obj_name), - Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), - _ => Use::Object(obj_name), + let result = if matches!(parsed_keyword, Some(Keyword::SECONDARY)) { + self.parse_secondary_roles()? + } else { + let obj_name = self.parse_object_name(false)?; + match parsed_keyword { + Some(Keyword::CATALOG) => Use::Catalog(obj_name), + Some(Keyword::DATABASE) => Use::Database(obj_name), + Some(Keyword::SCHEMA) => Use::Schema(obj_name), + Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), + Some(Keyword::ROLE) => Use::Role(obj_name), + _ => Use::Object(obj_name), + } }; Ok(Statement::Use(result)) } + fn parse_secondary_roles(&mut self) -> Result { + self.expect_keyword_is(Keyword::ROLES)?; + if self.parse_keyword(Keyword::NONE) { + Ok(Use::SecondaryRoles(SecondaryRoles::None)) + } else if self.parse_keyword(Keyword::ALL) { + Ok(Use::SecondaryRoles(SecondaryRoles::All)) + } else { + let roles = self.parse_comma_separated(|parser| parser.parse_identifier())?; + Ok(Use::SecondaryRoles(SecondaryRoles::List(roles))) + } + } + pub fn parse_table_and_joins(&mut self) -> Result { let relation = self.parse_table_factor()?; // Note that for keywords to be properly handled here, they need to be @@ -10101,16 +10572,16 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::OUTER) { // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 - self.expect_keyword(Keyword::APPLY)?; + self.expect_keyword_is(Keyword::APPLY)?; Join { relation: self.parse_table_factor()?, global, join_operator: JoinOperator::OuterApply, } } else if self.parse_keyword(Keyword::ASOF) { - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; let relation = self.parse_table_factor()?; - self.expect_keyword(Keyword::MATCH_CONDITION)?; + self.expect_keyword_is(Keyword::MATCH_CONDITION)?; let match_condition = self.parse_parenthesized(Self::parse_expr)?; Join { relation, @@ -10131,7 +10602,7 @@ impl<'a> Parser<'a> { let join_operator_type = match peek_keyword { Keyword::INNER | Keyword::JOIN => { let _ = self.parse_keyword(Keyword::INNER); // [ INNER ] - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; JoinOperator::Inner } kw @ Keyword::LEFT | kw @ Keyword::RIGHT => { @@ -10145,7 +10616,7 @@ impl<'a> Parser<'a> { ]); match join_type { Some(Keyword::OUTER) => { - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; if is_left { JoinOperator::LeftOuter } else { @@ -10153,7 +10624,7 @@ impl<'a> Parser<'a> { } } Some(Keyword::SEMI) => { - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; if is_left { JoinOperator::LeftSemi } else { @@ -10161,7 +10632,7 @@ impl<'a> Parser<'a> { } } Some(Keyword::ANTI) => { - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; if is_left { JoinOperator::LeftAnti } else { @@ -10184,18 +10655,18 @@ impl<'a> Parser<'a> { } Keyword::ANTI => { let _ = self.next_token(); // consume ANTI - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; JoinOperator::Anti } Keyword::SEMI => { let _ = self.next_token(); // consume SEMI - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; JoinOperator::Semi } Keyword::FULL => { let _ = self.next_token(); // consume FULL let _ = self.parse_keyword(Keyword::OUTER); // [ OUTER ] - self.expect_keyword(Keyword::JOIN)?; + self.expect_keyword_is(Keyword::JOIN)?; JoinOperator::FullOuter } Keyword::OUTER => { @@ -10367,7 +10838,7 @@ impl<'a> Parser<'a> { ] ) { - self.expect_keyword(Keyword::VALUES)?; + self.expect_keyword_is(Keyword::VALUES)?; // Snowflake and Databricks allow syntax like below: // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) @@ -10431,7 +10902,7 @@ impl<'a> Parser<'a> { let json_expr = self.parse_expr()?; self.expect_token(&Token::Comma)?; let json_path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; + self.expect_keyword_is(Keyword::COLUMNS)?; self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; self.expect_token(&Token::RParen)?; @@ -10474,6 +10945,13 @@ impl<'a> Parser<'a> { let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let mut sample = None; + if self.dialect.supports_table_sample_before_alias() { + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::BeforeTableAlias(parsed_sample)); + } + } + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -10488,6 +10966,12 @@ impl<'a> Parser<'a> { } }; + if !self.dialect.supports_table_sample_before_alias() { + if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + sample = Some(TableSampleKind::AfterTableAlias(parsed_sample)); + } + } + let mut table = TableFactor::Table { name, alias, @@ -10497,6 +10981,7 @@ impl<'a> Parser<'a> { partitions, with_ordinality, json_path, + sample, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { @@ -10517,6 +11002,115 @@ impl<'a> Parser<'a> { } } + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { + let modifier = if self.parse_keyword(Keyword::TABLESAMPLE) { + TableSampleModifier::TableSample + } else if self.parse_keyword(Keyword::SAMPLE) { + TableSampleModifier::Sample + } else { + return Ok(None); + }; + + let name = match self.parse_one_of_keywords(&[ + Keyword::BERNOULLI, + Keyword::ROW, + Keyword::SYSTEM, + Keyword::BLOCK, + ]) { + Some(Keyword::BERNOULLI) => Some(TableSampleMethod::Bernoulli), + Some(Keyword::ROW) => Some(TableSampleMethod::Row), + Some(Keyword::SYSTEM) => Some(TableSampleMethod::System), + Some(Keyword::BLOCK) => Some(TableSampleMethod::Block), + _ => None, + }; + + let parenthesized = self.consume_token(&Token::LParen); + + let (quantity, bucket) = if parenthesized && self.parse_keyword(Keyword::BUCKET) { + let selected_bucket = self.parse_number_value()?; + self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; + let total = self.parse_number_value()?; + let on = if self.parse_keyword(Keyword::ON) { + Some(self.parse_expr()?) + } else { + None + }; + ( + None, + Some(TableSampleBucket { + bucket: selected_bucket, + total, + on, + }), + ) + } else { + let value = match self.maybe_parse(|p| p.parse_expr())? { + Some(num) => num, + None => { + if let Token::Word(w) = self.next_token().token { + Expr::Value(Value::Placeholder(w.value)) + } else { + return parser_err!( + "Expecting number or byte length e.g. 100M", + self.peek_token().span.start + ); + } + } + }; + let unit = if self.parse_keyword(Keyword::ROWS) { + Some(TableSampleUnit::Rows) + } else if self.parse_keyword(Keyword::PERCENT) { + Some(TableSampleUnit::Percent) + } else { + None + }; + ( + Some(TableSampleQuantity { + parenthesized, + value, + unit, + }), + None, + ) + }; + if parenthesized { + self.expect_token(&Token::RParen)?; + } + + let seed = if self.parse_keyword(Keyword::REPEATABLE) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Repeatable)?) + } else if self.parse_keyword(Keyword::SEED) { + Some(self.parse_table_sample_seed(TableSampleSeedModifier::Seed)?) + } else { + None + }; + + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Some(Box::new(TableSample { + modifier, + name, + quantity, + seed, + bucket, + offset, + }))) + } + + fn parse_table_sample_seed( + &mut self, + modifier: TableSampleSeedModifier, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(TableSampleSeed { modifier, value }) + } + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, /// assuming the `OPENJSON` keyword was already consumed. fn parse_open_json_table_factor(&mut self) -> Result { @@ -10564,7 +11158,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(|p| { let expr = p.parse_expr()?; let _ = p.parse_keyword(Keyword::AS); - let alias = p.parse_identifier(false)?; + let alias = p.parse_identifier()?; Ok(Measure { expr, alias }) })? } else { @@ -10610,9 +11204,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) { Some(AfterMatchSkip::ToNextRow) } else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) { - Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?)) + Some(AfterMatchSkip::ToFirst(self.parse_identifier()?)) } else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) { - Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?)) + Some(AfterMatchSkip::ToLast(self.parse_identifier()?)) } else { let found = self.next_token(); return self.expected("after match skip option", found); @@ -10621,14 +11215,14 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::PATTERN)?; + self.expect_keyword_is(Keyword::PATTERN)?; let pattern = self.parse_parenthesized(Self::parse_pattern)?; - self.expect_keyword(Keyword::DEFINE)?; + self.expect_keyword_is(Keyword::DEFINE)?; let symbols = self.parse_comma_separated(|p| { - let symbol = p.parse_identifier(false)?; - p.expect_keyword(Keyword::AS)?; + let symbol = p.parse_identifier()?; + p.expect_keyword_is(Keyword::AS)?; let definition = p.parse_expr()?; Ok(SymbolDefinition { symbol, definition }) })?; @@ -10658,9 +11252,7 @@ impl<'a> Parser<'a> { } Token::LBrace => { self.expect_token(&Token::Minus)?; - let symbol = self - .parse_identifier(false) - .map(MatchRecognizeSymbol::Named)?; + let symbol = self.parse_identifier().map(MatchRecognizeSymbol::Named)?; self.expect_token(&Token::Minus)?; self.expect_token(&Token::RBrace)?; Ok(MatchRecognizePattern::Exclude(symbol)) @@ -10672,7 +11264,7 @@ impl<'a> Parser<'a> { }) if value == "PERMUTE" => { self.expect_token(&Token::LParen)?; let symbols = self.parse_comma_separated(|p| { - p.parse_identifier(false).map(MatchRecognizeSymbol::Named) + p.parse_identifier().map(MatchRecognizeSymbol::Named) })?; self.expect_token(&Token::RParen)?; Ok(MatchRecognizePattern::Permute(symbols)) @@ -10684,7 +11276,7 @@ impl<'a> Parser<'a> { } _ => { self.prev_token(); - self.parse_identifier(false) + self.parse_identifier() .map(MatchRecognizeSymbol::Named) .map(MatchRecognizePattern::Symbol) } @@ -10793,7 +11385,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::NESTED) { let _has_path_keyword = self.parse_keyword(Keyword::PATH); let path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; + self.expect_keyword_is(Keyword::COLUMNS)?; let columns = self.parse_parenthesized(|p| { p.parse_comma_separated(Self::parse_json_table_column_def) })?; @@ -10802,14 +11394,14 @@ impl<'a> Parser<'a> { columns, })); } - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; if self.parse_keyword(Keyword::FOR) { - self.expect_keyword(Keyword::ORDINALITY)?; + self.expect_keyword_is(Keyword::ORDINALITY)?; return Ok(JsonTableColumn::ForOrdinality(name)); } let r#type = self.parse_data_type()?; let exists = self.parse_keyword(Keyword::EXISTS); - self.expect_keyword(Keyword::PATH)?; + self.expect_keyword_is(Keyword::PATH)?; let path = self.parse_value()?; let mut on_empty = None; let mut on_error = None; @@ -10817,7 +11409,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::EMPTY) { on_empty = Some(error_handling); } else { - self.expect_keyword(Keyword::ERROR)?; + self.expect_keyword_is(Keyword::ERROR)?; on_error = Some(error_handling); } } @@ -10839,7 +11431,7 @@ impl<'a> Parser<'a> { /// /// Reference: pub fn parse_openjson_table_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let r#type = self.parse_data_type()?; let path = if let Token::SingleQuotedString(path) = self.peek_token().token { self.next_token(); @@ -10849,7 +11441,7 @@ impl<'a> Parser<'a> { }; let as_json = self.parse_keyword(Keyword::AS); if as_json { - self.expect_keyword(Keyword::JSON)?; + self.expect_keyword_is(Keyword::JSON)?; } Ok(OpenJsonTableColumn { name, @@ -10871,7 +11463,7 @@ impl<'a> Parser<'a> { } else { return Ok(None); }; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; Ok(Some(res)) } @@ -10899,7 +11491,7 @@ impl<'a> Parser<'a> { }?; let expr = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -10931,7 +11523,7 @@ impl<'a> Parser<'a> { pub fn parse_expr_with_alias(&mut self) -> Result { let expr = self.parse_expr()?; let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -10945,9 +11537,9 @@ impl<'a> Parser<'a> { ) -> Result { self.expect_token(&Token::LParen)?; let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; - self.expect_keyword(Keyword::FOR)?; + self.expect_keyword_is(Keyword::FOR)?; let value_column = self.parse_object_name(false)?.0; - self.expect_keyword(Keyword::IN)?; + self.expect_keyword_is(Keyword::IN)?; self.expect_token(&Token::LParen)?; let value_source = if self.parse_keyword(Keyword::ANY) { @@ -10991,10 +11583,10 @@ impl<'a> Parser<'a> { table: TableFactor, ) -> Result { self.expect_token(&Token::LParen)?; - let value = self.parse_identifier(false)?; - self.expect_keyword(Keyword::FOR)?; - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::IN)?; + let value = self.parse_identifier()?; + self.expect_keyword_is(Keyword::FOR)?; + let name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::IN)?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; @@ -11026,15 +11618,15 @@ impl<'a> Parser<'a> { pub fn parse_grant(&mut self) -> Result { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - self.expect_keyword(Keyword::TO)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_keyword_is(Keyword::TO)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier())?; let with_grant_option = self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); let granted_by = self .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); + .then(|| self.parse_identifier().unwrap()); Ok(Statement::Grant { privileges, @@ -11086,7 +11678,7 @@ impl<'a> Parser<'a> { Privileges::Actions(act) }; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; let objects = if self.parse_keywords(&[ Keyword::ALL, @@ -11157,12 +11749,12 @@ impl<'a> Parser<'a> { pub fn parse_revoke(&mut self) -> Result { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - self.expect_keyword(Keyword::FROM)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_keyword_is(Keyword::FROM)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier())?; let granted_by = self .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); + .then(|| self.parse_identifier().unwrap()); let loc = self.peek_token().span.start; let cascade = self.parse_keyword(Keyword::CASCADE); @@ -11224,9 +11816,8 @@ impl<'a> Parser<'a> { let replace_into = false; - let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); - let into = action == Some(Keyword::INTO); - let overwrite = action == Some(Keyword::OVERWRITE); + let overwrite = self.parse_keyword(Keyword::OVERWRITE); + let into = self.parse_keyword(Keyword::INTO); let local = self.parse_keyword(Keyword::LOCAL); @@ -11252,7 +11843,7 @@ impl<'a> Parser<'a> { let table_alias = if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) + Some(self.parse_identifier()?) } else { None }; @@ -11263,14 +11854,19 @@ impl<'a> Parser<'a> { if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { (vec![], None, vec![], None) } else { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + (columns, partitioned, after_columns) } else { - vec![] + Default::default() }; let source = Some(self.parse_query()?); @@ -11304,12 +11900,12 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::DO)?; + self.expect_keyword_is(Keyword::DO)?; let action = if self.parse_keyword(Keyword::NOTHING) { OnConflictAction::DoNothing } else { - self.expect_keyword(Keyword::UPDATE)?; - self.expect_keyword(Keyword::SET)?; + self.expect_keyword_is(Keyword::UPDATE)?; + self.expect_keyword_is(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -11327,9 +11923,9 @@ impl<'a> Parser<'a> { action, })) } else { - self.expect_keyword(Keyword::DUPLICATE)?; - self.expect_keyword(Keyword::KEY)?; - self.expect_keyword(Keyword::UPDATE)?; + self.expect_keyword_is(Keyword::DUPLICATE)?; + self.expect_keyword_is(Keyword::KEY)?; + self.expect_keyword_is(Keyword::UPDATE)?; let l = self.parse_comma_separated(Parser::parse_assignment)?; Some(OnInsert::DuplicateKeyUpdate(l)) @@ -11365,6 +11961,14 @@ impl<'a> Parser<'a> { } } + /// Returns true if the immediate tokens look like the + /// beginning of a subquery. `(SELECT ...` + fn peek_subquery_start(&mut self) -> bool { + let [maybe_lparen, maybe_select] = self.peek_tokens(); + Token::LParen == maybe_lparen + && matches!(maybe_select, Token::Word(w) if w.keyword == Keyword::SELECT) + } + fn parse_conflict_clause(&mut self) -> Option { if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { Some(SqliteOnConflict::Replace) @@ -11399,7 +12003,7 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { if self.parse_keyword(Keyword::INPUTFORMAT) { let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::SERDE)?; + self.expect_keyword_is(Keyword::SERDE)?; let serde = self.parse_expr()?; Ok(Some(HiveLoadDataFormat { input_format, @@ -11420,14 +12024,19 @@ impl<'a> Parser<'a> { pub fn parse_update(&mut self) -> Result { let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; + let from_before_set = if self.parse_keyword(Keyword::FROM) { + Some(UpdateTableFromKind::BeforeSet( + self.parse_table_and_joins()?, + )) + } else { + None + }; self.expect_keyword(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let from = if self.parse_keyword(Keyword::FROM) - && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect | SQLiteDialect ) - { - Some(self.parse_table_and_joins()?) + let from = if from_before_set.is_none() && self.parse_keyword(Keyword::FROM) { + Some(UpdateTableFromKind::AfterSet(self.parse_table_and_joins()?)) } else { - None + from_before_set }; let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -11483,7 +12092,7 @@ impl<'a> Parser<'a> { })? } else { self.maybe_parse(|p| { - let name = p.parse_identifier(false)?; + let name = p.parse_identifier()?; let operator = p.parse_function_named_arg_operator()?; let arg = p.parse_wildcard_expr()?.into(); Ok(FunctionArg::Named { @@ -11778,12 +12387,11 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { if self.consume_token(&Token::LParen) { - let columns = - self.parse_comma_separated(|parser| parser.parse_identifier(false))?; + let columns = self.parse_comma_separated(|parser| parser.parse_identifier())?; self.expect_token(&Token::RParen)?; Some(ExcludeSelectItem::Multiple(columns)) } else { - let column = self.parse_identifier(false)?; + let column = self.parse_identifier()?; Some(ExcludeSelectItem::Single(column)) } } else { @@ -11816,7 +12424,7 @@ impl<'a> Parser<'a> { } } else { // Clickhouse allows EXCEPT column_name - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; Some(ExceptSelectItem { first_element: ident, additional_elements: vec![], @@ -11874,7 +12482,7 @@ impl<'a> Parser<'a> { pub fn parse_replace_elements(&mut self) -> Result { let expr = self.parse_expr()?; let as_keyword = self.parse_keyword(Keyword::AS); - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; Ok(ReplaceSelectElement { expr, column_name: ident, @@ -11971,7 +12579,7 @@ impl<'a> Parser<'a> { // Parse a INTERPOLATE expression (ClickHouse dialect) pub fn parse_interpolation(&mut self) -> Result { - let column = self.parse_identifier(false)?; + let column = self.parse_identifier()?; let expr = if self.parse_keyword(Keyword::AS) { Some(self.parse_expr()?) } else { @@ -12105,10 +12713,11 @@ impl<'a> Parser<'a> { } pub fn parse_start_transaction(&mut self) -> Result { - self.expect_keyword(Keyword::TRANSACTION)?; + self.expect_keyword_is(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, begin: false, + transaction: Some(BeginTransactionKind::Transaction), modifier: None, }) } @@ -12125,10 +12734,15 @@ impl<'a> Parser<'a> { } else { None }; - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { + Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), + Some(Keyword::WORK) => Some(BeginTransactionKind::Work), + _ => None, + }; Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, begin: true, + transaction, modifier, }) } @@ -12192,7 +12806,7 @@ impl<'a> Parser<'a> { let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); if self.parse_keyword(Keyword::AND) { let chain = !self.parse_keyword(Keyword::NO); - self.expect_keyword(Keyword::CHAIN)?; + self.expect_keyword_is(Keyword::CHAIN)?; Ok(chain) } else { Ok(false) @@ -12202,7 +12816,7 @@ impl<'a> Parser<'a> { pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::TO) { let _ = self.parse_keyword(Keyword::SAVEPOINT); - let savepoint = self.parse_identifier(false)?; + let savepoint = self.parse_identifier()?; Ok(Some(savepoint)) } else { @@ -12212,7 +12826,7 @@ impl<'a> Parser<'a> { pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; Ok(Statement::Deallocate { name, prepare }) } @@ -12252,7 +12866,7 @@ impl<'a> Parser<'a> { } pub fn parse_prepare(&mut self) -> Result { - let name = self.parse_identifier(false)?; + let name = self.parse_identifier()?; let mut data_types = vec![]; if self.consume_token(&Token::LParen) { @@ -12260,7 +12874,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; } - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let statement = Box::new(self.parse_statement()?); Ok(Statement::Prepare { name, @@ -12274,8 +12888,8 @@ impl<'a> Parser<'a> { let query = self.parse_query()?; self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::TO)?; - let to = self.parse_identifier(false)?; + self.expect_keyword_is(Keyword::TO)?; + let to = self.parse_identifier()?; let with_options = self.parse_options(Keyword::WITH)?; @@ -12292,13 +12906,13 @@ impl<'a> Parser<'a> { if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { break; } - self.expect_keyword(Keyword::WHEN)?; + self.expect_keyword_is(Keyword::WHEN)?; let mut clause_kind = MergeClauseKind::Matched; if self.parse_keyword(Keyword::NOT) { clause_kind = MergeClauseKind::NotMatched; } - self.expect_keyword(Keyword::MATCHED)?; + self.expect_keyword_is(Keyword::MATCHED)?; if matches!(clause_kind, MergeClauseKind::NotMatched) && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) @@ -12316,7 +12930,7 @@ impl<'a> Parser<'a> { None }; - self.expect_keyword(Keyword::THEN)?; + self.expect_keyword_is(Keyword::THEN)?; let merge_clause = match self.parse_one_of_keywords(&[ Keyword::UPDATE, @@ -12332,7 +12946,7 @@ impl<'a> Parser<'a> { "UPDATE is not allowed in a {clause_kind} merge clause" ))); } - self.expect_keyword(Keyword::SET)?; + self.expect_keyword_is(Keyword::SET)?; MergeAction::Update { assignments: self.parse_comma_separated(Parser::parse_assignment)?, } @@ -12365,7 +12979,7 @@ impl<'a> Parser<'a> { { MergeInsertKind::Row } else { - self.expect_keyword(Keyword::VALUES)?; + self.expect_keyword_is(Keyword::VALUES)?; let values = self.parse_values(is_mysql)?; MergeInsertKind::Values(values) }; @@ -12391,9 +13005,9 @@ impl<'a> Parser<'a> { let table = self.parse_table_factor()?; - self.expect_keyword(Keyword::USING)?; + self.expect_keyword_is(Keyword::USING)?; let source = self.parse_table_factor()?; - self.expect_keyword(Keyword::ON)?; + self.expect_keyword_is(Keyword::ON)?; let on = self.parse_expr()?; let clauses = self.parse_merge_clauses()?; @@ -12447,7 +13061,7 @@ impl<'a> Parser<'a> { /// `INSTALL [extension_name]` pub fn parse_install(&mut self) -> Result { - let extension_name = self.parse_identifier(false)?; + let extension_name = self.parse_identifier()?; Ok(Statement::Install { extension_name }) } @@ -12455,15 +13069,15 @@ impl<'a> Parser<'a> { /// Parse a SQL LOAD statement pub fn parse_load(&mut self) -> Result { if self.dialect.supports_load_extension() { - let extension_name = self.parse_identifier(false)?; + let extension_name = self.parse_identifier()?; Ok(Statement::Load { extension_name }) } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); - self.expect_keyword(Keyword::INPATH)?; + self.expect_keyword_is(Keyword::INPATH)?; let inpath = self.parse_literal_string()?; let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); - self.expect_keyword(Keyword::INTO)?; - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::INTO)?; + self.expect_keyword_is(Keyword::TABLE)?; let table_name = self.parse_object_name(false)?; let partitioned = self.parse_insert_partition()?; let table_format = self.parse_load_data_table_format()?; @@ -12488,13 +13102,13 @@ impl<'a> Parser<'a> { /// ``` /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) pub fn parse_optimize_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; + self.expect_keyword_is(Keyword::TABLE)?; let name = self.parse_object_name(false)?; let on_cluster = self.parse_optional_on_cluster()?; let partition = if self.parse_keyword(Keyword::PARTITION) { if self.parse_keyword(Keyword::ID) { - Some(Partition::Identifier(self.parse_identifier(false)?)) + Some(Partition::Identifier(self.parse_identifier()?)) } else { Some(Partition::Expr(self.parse_expr()?)) } @@ -12609,13 +13223,13 @@ impl<'a> Parser<'a> { } pub fn parse_named_window(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; + let ident = self.parse_identifier()?; + self.expect_keyword_is(Keyword::AS)?; let window_expr = if self.consume_token(&Token::LParen) { NamedWindowExpr::WindowSpec(self.parse_window_spec()?) } else if self.dialect.supports_window_clause_named_window_reference() { - NamedWindowExpr::NamedWindow(self.parse_identifier(false)?) + NamedWindowExpr::NamedWindow(self.parse_identifier()?) } else { return self.expected("(", self.peek_token()); }; @@ -12626,10 +13240,10 @@ impl<'a> Parser<'a> { pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { let name = self.parse_object_name(false)?; let params = self.parse_optional_procedure_parameters()?; - self.expect_keyword(Keyword::AS)?; - self.expect_keyword(Keyword::BEGIN)?; + self.expect_keyword_is(Keyword::AS)?; + self.expect_keyword_is(Keyword::BEGIN)?; let statements = self.parse_statements()?; - self.expect_keyword(Keyword::END)?; + self.expect_keyword_is(Keyword::END)?; Ok(Statement::CreateProcedure { name, or_alter, @@ -12674,7 +13288,7 @@ impl<'a> Parser<'a> { pub fn parse_create_type(&mut self) -> Result { let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; + self.expect_keyword_is(Keyword::AS)?; let mut attributes = vec![]; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { @@ -12685,7 +13299,7 @@ impl<'a> Parser<'a> { } loop { - let attr_name = self.parse_identifier(false)?; + let attr_name = self.parse_identifier()?; let attr_data_type = self.parse_data_type()?; let attr_collation = if self.parse_keyword(Keyword::COLLATE) { Some(self.parse_object_name(false)?) @@ -12714,7 +13328,7 @@ impl<'a> Parser<'a> { fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; + let partitions = self.parse_comma_separated(|p| p.parse_identifier())?; self.expect_token(&Token::RParen)?; Ok(partitions) } @@ -12724,7 +13338,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::FIRST) { Ok(Some(MySQLColumnPosition::First)) } else if self.parse_keyword(Keyword::AFTER) { - let ident = self.parse_identifier(false)?; + let ident = self.parse_identifier()?; Ok(Some(MySQLColumnPosition::After(ident))) } else { Ok(None) @@ -12832,7 +13446,7 @@ impl<'a> Parser<'a> { .parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) .is_some() { - parent_name.0.insert(0, self.parse_identifier(false)?); + parent_name.0.insert(0, self.parse_identifier()?); } (None, Some(parent_name)) } diff --git a/src/test_utils.rs b/src/test_utils.rs index aaee20c5f..e76cdb87a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -346,6 +346,21 @@ pub fn table(name: impl Into) -> TableFactor { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, + } +} + +pub fn table_from_name(name: ObjectName) -> TableFactor { + TableFactor::Table { + name, + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, } } @@ -362,6 +377,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta partitions: vec![], with_ordinality: false, json_path: None, + sample: None, } } @@ -376,6 +392,7 @@ pub fn join(relation: TableFactor) -> Join { pub fn call(function: &str, args: impl IntoIterator) -> Expr { Expr::Function(Function { name: ObjectName(vec![Ident::new(function)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..da61303b4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::DoubleQuotedString(s))) } // delimited (quoted) identifier + quote_start if self.dialect.is_delimited_identifier_start(ch) => { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + Ok(Some(Token::make_word(&word, Some(quote_start)))) + } + // Potentially nested delimited (quoted) identifier quote_start - if self.dialect.is_delimited_identifier_start(ch) + if self + .dialect + .is_nested_delimited_identifier_start(quote_start) && self .dialect - .is_proper_identifier_inside_quotes(chars.peekable.clone()) => + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + .is_some() => { - let error_loc = chars.location(); - chars.next(); // consume the opening quote + let Some((quote_start, nested_quote_start)) = self + .dialect + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + else { + return self.tokenizer_error( + chars.location(), + format!("Expected nested delimiter '{quote_start}' before EOF."), + ); + }; + + let Some(nested_quote_start) = nested_quote_start else { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + return Ok(Some(Token::make_word(&word, Some(quote_start)))); + }; + + let mut word = vec![]; let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + let nested_quote_end = Word::matching_end_quote(nested_quote_start); + let error_loc = chars.location(); - if last_char == Some(quote_end) { - Ok(Some(Token::make_word(&s, Some(quote_start)))) - } else { - self.tokenizer_error( + chars.next(); // skip the first delimiter + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some(&nested_quote_start) { + return self.tokenizer_error( + error_loc, + format!("Expected nested delimiter '{nested_quote_start}' before EOF."), + ); + } + word.push(nested_quote_start.into()); + word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?); + word.push(nested_quote_end.into()); + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some("e_end) { + return self.tokenizer_error( error_loc, format!("Expected close delimiter '{quote_end}' before EOF."), - ) + ); } + chars.next(); // skip close delimiter + + Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) } // numbers and period '0'..='9' | '.' => { @@ -1473,7 +1509,8 @@ impl<'a> Tokenizer<'a> { chars.next(); - if let Some('$') = chars.peek() { + // If the dialect does not support dollar-quoted strings, then `$$` is rather a placeholder. + if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() { chars.next(); let mut is_terminated = false; @@ -1507,10 +1544,14 @@ impl<'a> Tokenizer<'a> { }; } else { value.push_str(&peeking_take_while(chars, |ch| { - ch.is_alphanumeric() || ch == '_' + ch.is_alphanumeric() + || ch == '_' + // Allow $ as a placeholder character if the dialect supports it + || matches!(ch, '$' if self.dialect.supports_dollar_placeholder()) })); - if let Some('$') = chars.peek() { + // If the dialect does not support dollar-quoted strings, don't look for the end delimiter. + if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() { chars.next(); 'searching_for_end: loop { @@ -1597,6 +1638,27 @@ impl<'a> Tokenizer<'a> { s } + /// Read a quoted identifier + fn tokenize_quoted_identifier( + &self, + quote_start: char, + chars: &mut State, + ) -> Result { + let error_loc = chars.location(); + chars.next(); // consume the opening quote + let quote_end = Word::matching_end_quote(quote_start); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + + if last_char == Some(quote_end) { + Ok(s) + } else { + self.tokenizer_error( + error_loc, + format!("Expected close delimiter '{quote_end}' before EOF."), + ) + } + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_escaped_single_quoted_string( &self, @@ -2080,7 +2142,7 @@ fn take_char_from_hex_digits( mod tests { use super::*; use crate::dialect::{ - BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, }; use core::fmt::Debug; @@ -2516,6 +2578,30 @@ mod tests { ); } + #[test] + fn tokenize_dollar_placeholder() { + let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC"); + let dialect = SQLiteDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + assert_eq!( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$$ABC$$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$ABC$".into()), + Token::Comma, + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$ABC".into()), + ] + ); + } + #[test] fn tokenize_dollar_quoted_string_untagged() { let sql = diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2be128a8c..9dfabc014 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -23,7 +23,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use sqlparser::parser::{ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; +use sqlparser::tokenizer::{Location, Span}; use test_utils::*; #[test] @@ -222,16 +222,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -1379,16 +1370,7 @@ fn parse_table_identifiers() { assert_eq!( select.from, vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(expected), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(expected)), joins: vec![] },] ); @@ -1522,6 +1504,17 @@ fn parse_hyphenated_table_identifiers() { "SELECT * FROM foo-bar AS f JOIN baz-qux AS b ON f.id = b.id", ); + assert_eq!( + bigquery() + .verified_only_select_with_canonical( + "select * from foo-123.bar", + "SELECT * FROM foo-123.bar" + ) + .from[0] + .relation, + table_from_name(ObjectName(vec![Ident::new("foo-123"), Ident::new("bar")])), + ); + assert_eq!( bigquery() .verified_only_select_with_canonical( @@ -1562,6 +1555,7 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![] },] @@ -1661,6 +1655,7 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, }, table ); @@ -1677,6 +1672,7 @@ fn parse_merge() { partitions: Default::default(), with_ordinality: false, json_path: None, + sample: None, }, source ); @@ -1969,27 +1965,47 @@ fn parse_map_access_expr() { let sql = "users[-1][safe_offset(2)].a.b"; let expr = bigquery().verified_expr(sql); - fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey { - MapAccessKey { key, syntax } - } - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - map_access_key( - Expr::UnaryOp { + let expected = Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, - MapAccessSyntax::Bracket, - ), - map_access_key( - call("safe_offset", [Expr::Value(number("2"))]), - MapAccessSyntax::Bracket, - ), - map_access_key( - Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]), - MapAccessSyntax::Period, - ), + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + uses_odbc_syntax: false, + }), + }), + AccessExpr::Dot(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 24), Location::of(1, 25)), + "a", + ))), + AccessExpr::Dot(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 26), Location::of(1, 27)), + "b", + ))), ], }; assert_eq!(expr, expected); @@ -2212,3 +2228,19 @@ fn test_any_value() { bigquery_and_generic().verified_expr("ANY_VALUE(fruit HAVING MAX sold)"); bigquery_and_generic().verified_expr("ANY_VALUE(fruit HAVING MIN sold)"); } + +#[test] +fn test_any_type() { + bigquery().verified_stmt(concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "my_function(param1 ANY TYPE) ", + "AS (", + "(SELECT 1)", + ")", + )); +} + +#[test] +fn test_any_type_dont_break_custom_type() { + bigquery_and_generic().verified_stmt("CREATE TABLE foo (x ANY)"); +} diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index ed0c74021..2f1b043b6 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,7 +25,7 @@ use helpers::attached_token::AttachedToken; use sqlparser::tokenizer::Span; use test_utils::*; -use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; +use sqlparser::ast::Expr::{BinaryOp, Identifier}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; use sqlparser::ast::Value::Number; @@ -44,35 +44,25 @@ fn parse_map_access_expr() { select_token: AttachedToken::empty(), top: None, top_before_distinct: false, - projection: vec![UnnamedExpr(MapAccess { - column: Box::new(Identifier(Ident { + projection: vec![UnnamedExpr(Expr::CompoundFieldAccess { + root: Box::new(Identifier(Ident { value: "string_values".to_string(), quote_style: None, span: Span::empty(), })), - keys: vec![MapAccessKey { - key: call( + access_chain: vec![AccessExpr::Subscript(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_names")), Expr::Value(Value::SingleQuotedString("endpoint".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + })], })], into: None, from: vec![TableWithJoins { - relation: Table { - name: ObjectName(vec![Ident::new("foos")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("foos")])), joins: vec![], }], lateral_views: vec![], @@ -85,18 +75,17 @@ fn parse_map_access_expr() { }), op: BinaryOperator::And, right: Box::new(BinaryOp { - left: Box::new(MapAccess { - column: Box::new(Identifier(Ident::new("string_value"))), - keys: vec![MapAccessKey { - key: call( + left: Box::new(Expr::CompoundFieldAccess { + root: Box::new(Identifier(Ident::new("string_value"))), + access_chain: vec![AccessExpr::Subscript(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_name")), Expr::Value(Value::SingleQuotedString("app".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + })], }), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), @@ -175,9 +164,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -199,6 +186,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -821,6 +809,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Materialized(Expr::Function(Function { name: ObjectName(vec![Ident::new("now")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![], duplicate_treatment: None, @@ -842,6 +831,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Ephemeral(Some(Expr::Function(Function { name: ObjectName(vec![Ident::new("now")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![], duplicate_treatment: None, @@ -872,6 +862,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Alias(Expr::Function(Function { name: ObjectName(vec![Ident::new("toString")]), + uses_odbc_syntax: false, args: FunctionArguments::List(FunctionArgumentList { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Identifier(Ident::new("c")) @@ -1621,6 +1612,14 @@ fn parse_explain_table() { } } +#[test] +fn parse_table_sample() { + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 0.1"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1000"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10"); + clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10 OFFSET 1 / 2"); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a2b6754b3..5e5e761bb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -37,11 +37,11 @@ use sqlparser::dialect::{ }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; use sqlparser::tokenizer::Tokenizer; +use sqlparser::tokenizer::{Location, Span}; use test_utils::{ all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, - join, number, only, table, table_alias, TestedDialects, + join, number, only, table, table_alias, table_from_name, TestedDialects, }; #[macro_use] @@ -51,6 +51,7 @@ mod test_utils; use pretty_assertions::assert_eq; use sqlparser::ast::ColumnOption::Comment; use sqlparser::ast::Expr::{Identifier, UnaryOp}; +use sqlparser::ast::Value::Number; use sqlparser::test_utils::all_dialects_except; #[test] @@ -358,23 +359,14 @@ fn parse_update_set_from() { stmt, Statement::Update { table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }, assignments: vec![Assignment { target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("name")])), value: Expr::CompoundIdentifier(vec![Ident::new("t2"), Ident::new("name")]) }], - from: Some(TableWithJoins { + from: Some(UpdateTableFromKind::AfterSet(TableWithJoins { relation: TableFactor::Derived { lateral: false, subquery: Box::new(Query { @@ -390,16 +382,7 @@ fn parse_update_set_from() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t1")])), joins: vec![], }], lateral_views: vec![], @@ -434,8 +417,8 @@ fn parse_update_set_from() { columns: vec![], }) }, - joins: vec![], - }), + joins: vec![] + })), selection: Some(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ Ident::new("t1"), @@ -479,6 +462,7 @@ fn parse_update_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }, @@ -571,12 +555,19 @@ fn parse_select_with_table_alias() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }] ); } +#[test] +fn parse_analyze() { + verified_stmt("ANALYZE TABLE test_table"); + verified_stmt("ANALYZE test_table"); +} + #[test] fn parse_invalid_table_name() { let ast = all_dialects().run_parser_method("db.public..customer", |parser| { @@ -600,16 +591,7 @@ fn parse_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::with_quote('"', "table")])), from[0].relation ); } @@ -648,29 +630,17 @@ fn parse_delete_statement_for_multi_tables() { tables[1] ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[0].joins[0].relation ); } @@ -688,55 +658,31 @@ fn parse_delete_statement_for_multi_tables_with_using() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), from[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), from[1].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema1"), + Ident::new("table1") + ])), using[0].relation ); assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("schema2"), Ident::new("table2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![ + Ident::new("schema2"), + Ident::new("table2") + ])), using[0].joins[0].relation ); } @@ -759,16 +705,7 @@ fn parse_where_delete_statement() { .. }) => { assert_eq!( - TableFactor::Table { - name: ObjectName(vec![Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + table_from_name(ObjectName(vec![Ident::new("foo")])), from[0].relation, ); @@ -814,6 +751,7 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, from[0].relation, ); @@ -831,6 +769,7 @@ fn parse_where_delete_with_alias_statement() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![], }]), @@ -1107,6 +1046,7 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1129,6 +1069,7 @@ fn parse_select_count_distinct() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), @@ -2366,6 +2307,7 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -2396,6 +2338,7 @@ fn parse_select_qualify() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -2802,6 +2745,7 @@ fn parse_listagg() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("LISTAGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), @@ -2996,6 +2940,138 @@ fn parse_window_function_null_treatment_arg() { ); } +#[test] +fn test_compound_expr() { + let supported_dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), + Box::new(BigQueryDialect {}), + ]); + let sqls = [ + "SELECT abc[1].f1 FROM t", + "SELECT abc[1].f1.f2 FROM t", + "SELECT f1.abc[1] FROM t", + "SELECT f1.f2.abc[1] FROM t", + "SELECT f1.abc[1].f2 FROM t", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT make_array(1, 2, 3)[1]", + "SELECT make_array(named_struct('a', 1))[1].a", + "SELECT abc[1][-1].a.b FROM t", + "SELECT abc[1][-1].a.b[1] FROM t", + ]; + for sql in sqls { + supported_dialects.verified_stmt(sql); + } +} + +#[test] +fn test_double_value() { + let dialects = all_dialects(); + let test_cases = vec![ + gen_number_case_with_sign("0."), + gen_number_case_with_sign("0.0"), + gen_number_case_with_sign("0000."), + gen_number_case_with_sign("0000.00"), + gen_number_case_with_sign(".0"), + gen_number_case_with_sign(".00"), + gen_number_case_with_sign("0e0"), + gen_number_case_with_sign("0e+0"), + gen_number_case_with_sign("0e-0"), + gen_number_case_with_sign("0.e-0"), + gen_number_case_with_sign("0.e+0"), + gen_number_case_with_sign(".0e-0"), + gen_number_case_with_sign(".0e+0"), + gen_number_case_with_sign("00.0e+0"), + gen_number_case_with_sign("00.0e-0"), + ]; + + for (input, expected) in test_cases { + for (i, expr) in input.iter().enumerate() { + if let Statement::Query(query) = + dialects.one_statement_parses_to(&format!("SELECT {}", expr), "") + { + if let SetExpr::Select(select) = *query.body { + assert_eq!(expected[i], select.projection[0]); + } else { + panic!("Expected a SELECT statement"); + } + } else { + panic!("Expected a SELECT statement"); + } + } + } +} + +fn gen_number_case(value: &str) -> (Vec, Vec) { + let input = vec![ + value.to_string(), + format!("{} col_alias", value), + format!("{} AS col_alias", value), + ]; + let expected = vec![ + SelectItem::UnnamedExpr(Expr::Value(number(value))), + SelectItem::ExprWithAlias { + expr: Expr::Value(number(value)), + alias: Ident::new("col_alias"), + }, + SelectItem::ExprWithAlias { + expr: Expr::Value(number(value)), + alias: Ident::new("col_alias"), + }, + ]; + (input, expected) +} + +fn gen_sign_number_case(value: &str, op: UnaryOperator) -> (Vec, Vec) { + match op { + UnaryOperator::Plus | UnaryOperator::Minus => {} + _ => panic!("Invalid sign"), + } + + let input = vec![ + format!("{}{}", op, value), + format!("{}{} col_alias", op, value), + format!("{}{} AS col_alias", op, value), + ]; + let expected = vec![ + SelectItem::UnnamedExpr(Expr::UnaryOp { + op, + expr: Box::new(Expr::Value(number(value))), + }), + SelectItem::ExprWithAlias { + expr: Expr::UnaryOp { + op, + expr: Box::new(Expr::Value(number(value))), + }, + alias: Ident::new("col_alias"), + }, + SelectItem::ExprWithAlias { + expr: Expr::UnaryOp { + op, + expr: Box::new(Expr::Value(number(value))), + }, + alias: Ident::new("col_alias"), + }, + ]; + (input, expected) +} + +/// generate the test cases for signed and unsigned numbers +/// For example, given "0.0", the test cases will be: +/// - "0.0" +/// - "+0.0" +/// - "-0.0" +fn gen_number_case_with_sign(number: &str) -> (Vec, Vec) { + let (mut input, mut expected) = gen_number_case(number); + for op in [UnaryOperator::Plus, UnaryOperator::Minus] { + let (input_sign, expected_sign) = gen_sign_number_case(number, op); + input.extend(input_sign); + expected.extend(expected_sign); + } + (input, expected) +} + #[test] fn parse_negative_value() { let sql1 = "SELECT -1"; @@ -3066,7 +3142,7 @@ fn parse_create_table() { }, ColumnDef { name: "lat".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![ColumnOptionDef { name: None, @@ -3075,7 +3151,7 @@ fn parse_create_table() { }, ColumnDef { name: "lng".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![], }, @@ -3255,7 +3331,7 @@ fn parse_create_table_with_constraint_characteristics() { }, ColumnDef { name: "lat".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![ColumnOptionDef { name: None, @@ -3264,7 +3340,7 @@ fn parse_create_table_with_constraint_characteristics() { }, ColumnDef { name: "lng".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![], }, @@ -3895,7 +3971,7 @@ fn parse_create_external_table() { }, ColumnDef { name: "lat".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![ColumnOptionDef { name: None, @@ -3904,7 +3980,7 @@ fn parse_create_external_table() { }, ColumnDef { name: "lng".into(), - data_type: DataType::Double, + data_type: DataType::Double(ExactNumberInfo::None), collation: None, options: vec![], }, @@ -4066,8 +4142,8 @@ fn test_alter_table_with_on_cluster() { Statement::AlterTable { name, on_cluster, .. } => { - std::assert_eq!(name.to_string(), "t"); - std::assert_eq!(on_cluster, Some(Ident::with_quote('\'', "cluster"))); + assert_eq!(name.to_string(), "t"); + assert_eq!(on_cluster, Some(Ident::with_quote('\'', "cluster"))); } _ => unreachable!(), } @@ -4078,15 +4154,15 @@ fn test_alter_table_with_on_cluster() { Statement::AlterTable { name, on_cluster, .. } => { - std::assert_eq!(name.to_string(), "t"); - std::assert_eq!(on_cluster, Some(Ident::new("cluster_name"))); + assert_eq!(name.to_string(), "t"); + assert_eq!(on_cluster, Some(Ident::new("cluster_name"))); } _ => unreachable!(), } let res = all_dialects() .parse_sql_statements("ALTER TABLE t ON CLUSTER 123 ADD CONSTRAINT bar PRIMARY KEY (baz)"); - std::assert_eq!( + assert_eq!( res.unwrap_err(), ParserError::ParserError("Expected: identifier, found: 123".to_string()) ) @@ -4230,6 +4306,7 @@ fn parse_alter_table_constraints() { check_one("UNIQUE (id)"); check_one("FOREIGN KEY (foo, bar) REFERENCES AnotherTable(foo, bar)"); check_one("CHECK (end_date > start_date OR end_date IS NULL)"); + check_one("CONSTRAINT fk FOREIGN KEY (lng) REFERENCES othertable4"); fn check_one(constraint_text: &str) { match alter_table_op(verified_stmt(&format!( @@ -4438,6 +4515,7 @@ fn run_explain_analyze( analyze, verbose, query_plan, + estimate, statement, format, options, @@ -4447,6 +4525,7 @@ fn run_explain_analyze( assert_eq!(format, expected_format); assert_eq!(options, exepcted_options); assert!(!query_plan); + assert!(!estimate); assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); } _ => panic!("Unexpected Statement, must be Explain"), @@ -4591,6 +4670,34 @@ fn parse_explain_query_plan() { ); } +#[test] +fn parse_explain_estimate() { + let statement = all_dialects().verified_stmt("EXPLAIN ESTIMATE SELECT sqrt(id) FROM foo"); + + match &statement { + Statement::Explain { + query_plan, + estimate, + analyze, + verbose, + statement, + .. + } => { + assert!(estimate); + assert!(!query_plan); + assert!(!analyze); + assert!(!verbose); + assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); + } + _ => unreachable!(), + } + + assert_eq!( + "EXPLAIN ESTIMATE SELECT sqrt(id) FROM foo", + statement.to_string() + ); +} + #[test] fn parse_named_argument_function() { let dialects = all_dialects_where(|d| { @@ -4603,6 +4710,7 @@ fn parse_named_argument_function() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4642,6 +4750,7 @@ fn parse_named_argument_function_with_eq_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4716,6 +4825,7 @@ fn parse_window_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4846,6 +4956,7 @@ fn test_parse_named_window() { quote_style: None, span: Span::empty(), }]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4880,6 +4991,7 @@ fn test_parse_named_window() { quote_style: None, span: Span::empty(), }]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -4910,20 +5022,11 @@ fn test_parse_named_window() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "aggregate_test_100".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "aggregate_test_100".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -5501,20 +5604,11 @@ fn parse_interval_and_or_xor() { }))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -6122,29 +6216,11 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1".into()])), joins: vec![], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), joins: vec![], }, ], @@ -6156,53 +6232,17 @@ fn parse_implicit_join() { assert_eq!( vec![ TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t1a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t1b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t1b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec!["t2a".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2a".into()])), joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2b".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2b".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6218,16 +6258,7 @@ fn parse_cross_join() { let select = verified_only_select(sql); assert_eq!( Join { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("t2")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("t2")])), global: false, join_operator: JoinOperator::CrossJoin, }, @@ -6253,6 +6284,7 @@ fn parse_joins_on() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { @@ -6381,6 +6413,7 @@ fn parse_joins_using() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), @@ -6455,6 +6488,7 @@ fn parse_natural_join() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: f(JoinConstraint::Natural), @@ -6718,16 +6752,7 @@ fn parse_derived_tables() { }), }, joins: vec![Join { - relation: TableFactor::Table { - name: ObjectName(vec!["t2".into()]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec!["t2".into()])), global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -7658,20 +7683,11 @@ fn lateral_function() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "customer".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "customer".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![Join { relation: TableFactor::Function { lateral: true, @@ -7737,9 +7753,9 @@ fn parse_start_transaction() { } verified_stmt("START TRANSACTION"); - one_statement_parses_to("BEGIN", "BEGIN TRANSACTION"); - one_statement_parses_to("BEGIN WORK", "BEGIN TRANSACTION"); - one_statement_parses_to("BEGIN TRANSACTION", "BEGIN TRANSACTION"); + verified_stmt("BEGIN"); + verified_stmt("BEGIN WORK"); + verified_stmt("BEGIN TRANSACTION"); verified_stmt("START TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); verified_stmt("START TRANSACTION ISOLATION LEVEL READ COMMITTED"); @@ -8489,6 +8505,7 @@ fn parse_merge() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, } ); assert_eq!(table, table_no_into); @@ -8509,16 +8526,10 @@ fn parse_merge() { )], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident::new("s"), + Ident::new("foo") + ])), joins: vec![], }], lateral_views: vec![], @@ -9008,6 +9019,7 @@ fn parse_time_functions() { let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -9252,7 +9264,7 @@ fn parse_cache_table() { format!( "CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) {sql}", ) - .as_str() + .as_str() ), Statement::Cache { table_flag: Some(ObjectName(vec![Ident::new(table_flag)])), @@ -9277,7 +9289,7 @@ fn parse_cache_table() { format!( "CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) AS {sql}", ) - .as_str() + .as_str() ), Statement::Cache { table_flag: Some(ObjectName(vec![Ident::new(table_flag)])), @@ -9602,6 +9614,7 @@ fn parse_pivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), aggregate_functions: vec![ expected_function("a", None), @@ -9677,6 +9690,7 @@ fn parse_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), value: Ident { value: "quantity".to_string(), @@ -9747,6 +9761,7 @@ fn parse_pivot_unpivot_table() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }), value: Ident { value: "population".to_string(), @@ -10023,6 +10038,7 @@ fn parse_call() { assert_eq!( verified_stmt("CALL my_procedure('a')"), Statement::Call(Function { + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10155,16 +10171,7 @@ fn parse_unload() { projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("tab")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("tab")])), joins: vec![], }], lateral_views: vec![], @@ -10302,20 +10309,39 @@ fn parse_map_access_expr() { Box::new(ClickHouseDialect {}), ]); let expr = dialects.verified_expr(sql); - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - MapAccessKey { - key: Expr::UnaryOp { + let expected = Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, - syntax: MapAccessSyntax::Bracket, - }, - MapAccessKey { - key: call("safe_offset", [Expr::Value(number("2"))]), - syntax: MapAccessSyntax::Bracket, - }, + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + uses_odbc_syntax: false, + }), + }), ], }; assert_eq!(expr, expected); @@ -10338,16 +10364,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10427,16 +10444,7 @@ fn parse_connect_by() { SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), ], from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("employees")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("employees")])), joins: vec![], }], into: None, @@ -10513,6 +10521,7 @@ fn test_selective_aggregation() { vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10531,6 +10540,7 @@ fn test_selective_aggregation() { SelectItem::ExprWithAlias { expr: Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -10589,16 +10599,7 @@ fn test_match_recognize() { use MatchRecognizeSymbol::*; use RepetitionQuantifier::*; - let table = TableFactor::Table { - name: ObjectName(vec![Ident::new("my_table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }; + let table = table_from_name(ObjectName(vec![Ident::new("my_table")])); fn check(options: &str, expect: TableFactor) { let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select( @@ -10966,6 +10967,37 @@ fn insert_into_with_parentheses() { Box::new(GenericDialect {}), ]); dialects.verified_stmt("INSERT INTO t1 (id, name) (SELECT t2.id, t2.name FROM t2)"); + dialects.verified_stmt("INSERT INTO t1 (SELECT t2.id, t2.name FROM t2)"); + dialects.verified_stmt(r#"INSERT INTO t1 ("select", name) (SELECT t2.name FROM t2)"#); +} + +#[test] +fn parse_odbc_scalar_function() { + let select = verified_only_select("SELECT {fn my_func(1, 2)}"); + let Expr::Function(Function { + name, + uses_odbc_syntax, + args, + .. + }) = expr_from_projection(only(&select.projection)) + else { + unreachable!("expected function") + }; + assert_eq!(name, &ObjectName(vec![Ident::new("my_func")])); + assert!(uses_odbc_syntax); + matches!(args, FunctionArguments::List(l) if l.args.len() == 2); + + verified_stmt("SELECT {fn fna()} AS foo, fnb(1)"); + + // Testing invalid SQL with any-one dialect is intentional. + // Depending on dialect flags the error message may be different. + let pg = TestedDialects::new(vec![Box::new(PostgreSqlDialect {})]); + assert_eq!( + pg.parse_sql_statements("SELECT {fn2 my_func()}") + .unwrap_err() + .to_string(), + "sql parser error: Expected: an expression, found: {" + ); } #[test] @@ -11099,8 +11131,8 @@ fn test_map_syntax() { check( "MAP {'a': 10, 'b': 20}['a']", - Expr::Subscript { - expr: Box::new(Expr::Map(Map { + Expr::CompoundFieldAccess { + root: Box::new(Expr::Map(Map { entries: vec![ MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), @@ -11112,9 +11144,9 @@ fn test_map_syntax() { }, ], })), - subscript: Box::new(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(Value::SingleQuotedString("a".to_owned())), - }), + })], }, ); @@ -11228,7 +11260,7 @@ fn test_group_by_nothing() { let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) .verified_only_select("SELECT count(1) FROM t GROUP BY ()"); { - std::assert_eq!( + assert_eq!( GroupByExpr::Expressions(vec![Expr::Tuple(vec![])], vec![]), group_by ); @@ -11237,7 +11269,7 @@ fn test_group_by_nothing() { let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) .verified_only_select("SELECT name, count(1) FROM t GROUP BY name, ()"); { - std::assert_eq!( + assert_eq!( GroupByExpr::Expressions( vec![ Identifier(Ident::new("name".to_string())), @@ -11462,7 +11494,7 @@ fn parse_explain_with_option_list() { }), }, ]; - run_explain_analyze ( + run_explain_analyze( all_dialects_where(|d| d.supports_explain_with_utility_options()), "EXPLAIN (ANALYZE, VERBOSE true, WAL OFF, FORMAT YAML, USER_DEF_NUM -100.1) SELECT sqrt(id) FROM foo", false, @@ -12443,3 +12475,268 @@ fn test_reserved_keywords_for_identifiers() { let sql = "SELECT MAX(interval) FROM tbl"; dialects.parse_sql_statements(sql).unwrap(); } + +#[test] +fn parse_create_table_with_bit_types() { + let sql = "CREATE TABLE t (a BIT, b BIT VARYING, c BIT(42), d BIT VARYING(43))"; + match verified_stmt(sql) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!(columns.len(), 4); + assert_eq!(columns[0].data_type, DataType::Bit(None)); + assert_eq!(columns[0].to_string(), "a BIT"); + assert_eq!(columns[1].data_type, DataType::BitVarying(None)); + assert_eq!(columns[1].to_string(), "b BIT VARYING"); + assert_eq!(columns[2].data_type, DataType::Bit(Some(42))); + assert_eq!(columns[2].to_string(), "c BIT(42)"); + assert_eq!(columns[3].data_type, DataType::BitVarying(Some(43))); + assert_eq!(columns[3].to_string(), "d BIT VARYING(43)"); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_composite_access_expr() { + assert_eq!( + verified_expr("f(a).b"), + Expr::CompoundFieldAccess { + root: Box::new(Expr::Function(Function { + name: ObjectName(vec![Ident::new("f")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + clauses: vec![], + }), + null_treatment: None, + filter: None, + over: None, + within_group: vec![] + })), + access_chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new("b")))] + } + ); + + // Nested Composite Access + assert_eq!( + verified_expr("f(a).b.c"), + Expr::CompoundFieldAccess { + root: Box::new(Expr::Function(Function { + name: ObjectName(vec![Ident::new("f")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + clauses: vec![], + }), + null_treatment: None, + filter: None, + over: None, + within_group: vec![] + })), + access_chain: vec![ + AccessExpr::Dot(Expr::Identifier(Ident::new("b"))), + AccessExpr::Dot(Expr::Identifier(Ident::new("c"))), + ] + } + ); + + // Composite Access in Select and Where Clauses + let stmt = verified_only_select("SELECT f(a).b FROM t WHERE f(a).b IS NOT NULL"); + let expr = Expr::CompoundFieldAccess { + root: Box::new(Expr::Function(Function { + name: ObjectName(vec![Ident::new("f")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")), + ))], + clauses: vec![], + }), + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })), + access_chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new("b")))], + }; + + assert_eq!(stmt.projection[0], SelectItem::UnnamedExpr(expr.clone())); + assert_eq!(stmt.selection.unwrap(), Expr::IsNotNull(Box::new(expr))); + + // Compound access with quoted identifier. + all_dialects_where(|d| d.is_delimited_identifier_start('"')) + .verified_only_select("SELECT f(a).\"an id\""); + + // Composite Access in struct literal + all_dialects_where(|d| d.supports_struct_literal()).verified_stmt( + "SELECT * FROM t WHERE STRUCT(STRUCT(1 AS a, NULL AS b) AS c, NULL AS d).c.a IS NOT NULL", + ); + let support_struct = all_dialects_where(|d| d.supports_struct_literal()); + let stmt = support_struct + .verified_only_select("SELECT STRUCT(STRUCT(1 AS a, NULL AS b) AS c, NULL AS d).c.a"); + let expected = SelectItem::UnnamedExpr(Expr::CompoundFieldAccess { + root: Box::new(Expr::Struct { + values: vec![ + Expr::Named { + name: Ident::new("c"), + expr: Box::new(Expr::Struct { + values: vec![ + Expr::Named { + name: Ident::new("a"), + expr: Box::new(Expr::Value(Number("1".parse().unwrap(), false))), + }, + Expr::Named { + name: Ident::new("b"), + expr: Box::new(Expr::Value(Value::Null)), + }, + ], + fields: vec![], + }), + }, + Expr::Named { + name: Ident::new("d"), + expr: Box::new(Expr::Value(Value::Null)), + }, + ], + fields: vec![], + }), + access_chain: vec![ + AccessExpr::Dot(Expr::Identifier(Ident::new("c"))), + AccessExpr::Dot(Expr::Identifier(Ident::new("a"))), + ], + }); + assert_eq!(stmt.projection[0], expected); +} + +#[test] +fn parse_create_table_with_enum_types() { + let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))"; + match all_dialects().verified_stmt(sql) { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!(name.to_string(), "t0"); + assert_eq!( + vec![ + ColumnDef { + name: Ident::new("foo"), + data_type: DataType::Enum( + vec![ + EnumMember::NamedValue( + "a".to_string(), + Expr::Value(Number("1".parse().unwrap(), false)) + ), + EnumMember::NamedValue( + "b".to_string(), + Expr::Value(Number("2".parse().unwrap(), false)) + ) + ], + Some(8) + ), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("bar"), + data_type: DataType::Enum( + vec![ + EnumMember::NamedValue( + "a".to_string(), + Expr::Value(Number("1".parse().unwrap(), false)) + ), + EnumMember::NamedValue( + "b".to_string(), + Expr::Value(Number("2".parse().unwrap(), false)) + ) + ], + Some(16) + ), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("baz"), + data_type: DataType::Enum( + vec![ + EnumMember::Name("a".to_string()), + EnumMember::Name("b".to_string()) + ], + None + ), + collation: None, + options: vec![], + } + ], + columns + ); + } + _ => unreachable!(), + } + + // invalid case missing value for enum pair + assert_eq!( + all_dialects() + .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))") + .unwrap_err(), + ParserError::ParserError("Expected: a value, found: )".to_string()) + ); + + // invalid case that name is not a string + assert_eq!( + all_dialects() + .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))") + .unwrap_err(), + ParserError::ParserError("Expected: literal string, found: 2".to_string()) + ); +} + +#[test] +fn test_table_sample() { + let dialects = all_dialects_where(|d| d.supports_table_sample_before_alias()); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 ROWS) AS t"); + dialects.verified_stmt("SELECT * FROM tbl TABLESAMPLE (50 PERCENT) AS t"); + + let dialects = all_dialects_where(|d| !d.supports_table_sample_before_alias()); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE BERNOULLI (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50)"); + dialects.verified_stmt("SELECT * FROM tbl AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); +} + +#[test] +fn overflow() { + let expr = std::iter::repeat("1") + .take(1000) + .collect::>() + .join(" + "); + let sql = format!("SELECT {}", expr); + + let mut statements = Parser::parse_sql(&GenericDialect {}, sql.as_str()).unwrap(); + let statement = statements.pop().unwrap(); + assert_eq!(statement.to_string(), sql); +} +#[test] +fn parse_select_without_projection() { + let dialects = all_dialects_where(|d| d.supports_empty_projections()); + dialects.verified_stmt("SELECT FROM users"); +} + +#[test] +fn parse_update_from_before_select() { + all_dialects() + .verified_stmt("UPDATE t1 FROM (SELECT name, id FROM t1 GROUP BY id) AS t2 SET name = t2.name WHERE t1.id = t2.id"); + + let query = + "UPDATE t1 FROM (SELECT name, id FROM t1 GROUP BY id) AS t2 SET name = t2.name FROM (SELECT name from t2) AS t2"; + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: FROM".to_string()), + parse_sql_statements(query).unwrap_err() + ); +} diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index d73c088a7..b9ca55d13 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -185,16 +185,7 @@ fn test_values_clause() { "SELECT * FROM values", )); assert_eq!( - Some(&TableFactor::Table { - name: ObjectName(vec![Ident::new("values")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }), + Some(&table_from_name(ObjectName(vec![Ident::new("values")]))), query .body .as_select() diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 01ac0649a..db4ffb6f6 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -268,20 +268,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -306,20 +297,11 @@ fn test_select_union_by_name() { top_before_distinct: false, into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![], }], lateral_views: vec![], @@ -606,6 +588,7 @@ fn test_duckdb_named_argument_function_with_assignment_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -647,8 +630,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Array(Array { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -656,9 +639,9 @@ fn test_array_index() { ], named: false })), - subscript: Box::new(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(number("3")) - }) + })] }, expr ); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 546b289ac..5349f1207 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -459,6 +459,7 @@ fn parse_delimited_identifiers() { with_ordinality: _, partitions: _, json_path: _, + sample: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -480,6 +481,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -536,6 +538,15 @@ fn parse_use() { ); } +#[test] +fn test_tample_sample() { + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 32 ON rand()) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (BUCKET 3 OUT OF 16 ON id)"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (100M) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (0.1 PERCENT) AS s"); + hive().verified_stmt("SELECT * FROM source TABLESAMPLE (10 ROWS)"); +} + fn hive() -> TestedDialects { TestedDialects::new(vec![Box::new(HiveDialect {})]) } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 31668c86a..ecc874af8 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -73,6 +73,7 @@ fn parse_table_time_travel() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![] },] @@ -221,6 +222,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -279,6 +281,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -338,6 +341,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -396,6 +400,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -434,6 +439,7 @@ fn parse_mssql_openjson() { with_ordinality: false, partitions: vec![], json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::OpenJsonTable { @@ -611,9 +617,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -635,6 +639,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1081,20 +1086,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], @@ -1388,6 +1384,7 @@ fn parse_create_table_with_valid_options() { }, ], ), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List( FunctionArgumentList { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2b132331e..4a4e79611 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -669,6 +669,7 @@ fn table_constraint_unique_primary_ctor( columns, index_options, characteristics, + nulls_distinct: NullsDistinctOption::None, }, None => TableConstraint::PrimaryKey { name, @@ -684,7 +685,7 @@ fn table_constraint_unique_primary_ctor( #[test] fn parse_create_table_primary_and_unique_key() { let sqls = ["UNIQUE KEY", "PRIMARY KEY"] - .map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))")); + .map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))")); let index_type_display = [Some(KeyOrIndexDisplay::Key), None]; @@ -752,7 +753,7 @@ fn parse_create_table_primary_and_unique_key() { #[test] fn parse_create_table_primary_and_unique_key_with_index_options() { let sqls = ["UNIQUE INDEX", "PRIMARY KEY"] - .map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')")); + .map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')")); let index_type_display = [Some(KeyOrIndexDisplay::Index), None]; @@ -826,7 +827,7 @@ fn parse_create_table_primary_and_unique_key_with_index_type() { #[test] fn parse_create_table_primary_and_unique_key_characteristic_test() { let sqls = ["UNIQUE INDEX", "PRIMARY KEY"] - .map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)")); + .map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)")); for sql in &sqls { mysql_and_generic().verified_stmt(sql); } @@ -889,7 +890,13 @@ fn parse_create_table_set_enum() { }, ColumnDef { name: Ident::new("baz"), - data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]), + data_type: DataType::Enum( + vec![ + EnumMember::Name("a".to_string()), + EnumMember::Name("b".to_string()) + ], + None + ), collation: None, options: vec![], } @@ -1877,16 +1884,9 @@ fn parse_select_with_numeric_prefix_column_name() { )))], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -1936,16 +1936,9 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { ], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::with_quote('"', "table")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::with_quote( + '"', "table" + )])), joins: vec![] }], lateral_views: vec![], @@ -2013,6 +2006,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -2027,6 +2021,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, json_path: None, + sample: None, }, global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { @@ -2457,20 +2452,11 @@ fn parse_substring_in_select() { })], into: None, from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "test".to_string(), - quote_style: None, - span: Span::empty(), - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None, + span: Span::empty(), + }])), joins: vec![] }], lateral_views: vec![], @@ -3031,3 +3017,18 @@ fn parse_longblob_type() { mysql_and_generic().verified_stmt("CREATE TABLE foo (bar MEDIUMTEXT)"); mysql_and_generic().verified_stmt("CREATE TABLE foo (bar LONGTEXT)"); } + +#[test] +fn parse_begin_without_transaction() { + mysql().verified_stmt("BEGIN"); +} + +#[test] +fn parse_double_precision() { + mysql().verified_stmt("CREATE TABLE foo (bar DOUBLE)"); + mysql().verified_stmt("CREATE TABLE foo (bar DOUBLE(11,0))"); + mysql().one_statement_parses_to( + "CREATE TABLE foo (bar DOUBLE(11, 0))", + "CREATE TABLE foo (bar DOUBLE(11,0))", + ); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 52fe6c403..fd520d507 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -594,6 +594,25 @@ fn parse_alter_table_constraints_rename() { } } +#[test] +fn parse_alter_table_constraints_unique_nulls_distinct() { + match pg_and_generic() + .verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE NULLS NOT DISTINCT (c)") + { + Statement::AlterTable { operations, .. } => match &operations[0] { + AlterTableOperation::AddConstraint(TableConstraint::Unique { + nulls_distinct, .. + }) => { + assert_eq!(nulls_distinct, &NullsDistinctOption::NotDistinct) + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + pg_and_generic().verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE NULLS DISTINCT (c)"); + pg_and_generic().verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE (c)"); +} + #[test] fn parse_alter_table_disable() { pg_and_generic().verified_stmt("ALTER TABLE tab DISABLE ROW LEVEL SECURITY"); @@ -643,6 +662,100 @@ fn parse_create_extension() { .verified_stmt("CREATE EXTENSION extension_name WITH SCHEMA schema_name VERSION version"); } +#[test] +fn parse_drop_extension() { + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION extension_name"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: false, + cascade_or_restrict: None, + } + ); + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION extension_name CASCADE"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: false, + cascade_or_restrict: Some(ReferentialAction::Cascade), + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION extension_name RESTRICT"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: false, + cascade_or_restrict: Some(ReferentialAction::Restrict), + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION extension_name, extension_name2 CASCADE"), + Statement::DropExtension { + names: vec!["extension_name".into(), "extension_name2".into()], + if_exists: false, + cascade_or_restrict: Some(ReferentialAction::Cascade), + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION extension_name, extension_name2 RESTRICT"), + Statement::DropExtension { + names: vec!["extension_name".into(), "extension_name2".into()], + if_exists: false, + cascade_or_restrict: Some(ReferentialAction::Restrict), + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION IF EXISTS extension_name"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: true, + cascade_or_restrict: None, + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION IF EXISTS extension_name CASCADE"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: true, + cascade_or_restrict: Some(ReferentialAction::Cascade), + } + ); + + assert_eq!( + pg_and_generic().verified_stmt("DROP EXTENSION IF EXISTS extension_name RESTRICT"), + Statement::DropExtension { + names: vec!["extension_name".into()], + if_exists: true, + cascade_or_restrict: Some(ReferentialAction::Restrict), + } + ); + + assert_eq!( + pg_and_generic() + .verified_stmt("DROP EXTENSION IF EXISTS extension_name1, extension_name2 CASCADE"), + Statement::DropExtension { + names: vec!["extension_name1".into(), "extension_name2".into()], + if_exists: true, + cascade_or_restrict: Some(ReferentialAction::Cascade), + } + ); + + assert_eq!( + pg_and_generic() + .verified_stmt("DROP EXTENSION IF EXISTS extension_name1, extension_name2 RESTRICT"), + Statement::DropExtension { + names: vec!["extension_name1".into(), "extension_name2".into()], + if_exists: true, + cascade_or_restrict: Some(ReferentialAction::Restrict), + } + ); +} + #[test] fn parse_alter_table_alter_column() { pg().one_statement_parses_to( @@ -2076,11 +2189,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: num[0].clone() - }), + })], }, expr_from_projection(only(&select.projection)), ); @@ -2088,16 +2201,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), - }), - subscript: Box::new(Subscript::Index { - index: num[0].clone() - }), + AccessExpr::Subscript(Subscript::Index { + index: num[0].clone() + }) + ], }, expr_from_projection(only(&select.projection)), ); @@ -2105,29 +2218,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("bar"))), - subscript: Box::new(Subscript::Index { - index: num[0].clone() - }) + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Identifier(Ident::new("bar"))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { + index: num[0].clone() }), - subscript: Box::new(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "baz".to_string(), quote_style: Some('"'), span: Span::empty(), }) - }) - }), - subscript: Box::new(Subscript::Index { - index: Expr::Identifier(Ident { - value: "fooz".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }) - }) + }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Identifier(Ident { + value: "fooz".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }) + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2135,33 +2246,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], - named: true, - })], + &Expr::CompoundFieldAccess { + root: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), - None - ))), + })], + named: true, + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), None - )), - format: None, - }))), - subscript: Box::new(Subscript::Index { + ))), + None + )), + format: None, + }))), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Index { index: num[1].clone() }), - }), - subscript: Box::new(Subscript::Index { - index: num[2].clone() - }), + AccessExpr::Subscript(Subscript::Index { + index: num[2].clone() + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2250,9 +2361,13 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundFieldAccess { access_chain, .. } = pg_and_generic().verified_expr(sql) + else { panic!("expected subscript expr"); }; + let Some(AccessExpr::Subscript(subscript)) = access_chain.last() else { + panic!("expected subscript"); + }; assert_eq!(expect, *subscript); } @@ -2263,25 +2378,25 @@ fn parse_array_subscript() { fn parse_array_multi_subscript() { let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); assert_eq!( - Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(call( - "make_array", - vec![ - Expr::Value(number("1")), - Expr::Value(number("2")), - Expr::Value(number("3")) - ] - )), - subscript: Box::new(Subscript::Slice { + Expr::CompoundFieldAccess { + root: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + access_chain: vec![ + AccessExpr::Subscript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), stride: None, }), - }), - subscript: Box::new(Subscript::Index { - index: Expr::Value(number("2")), - }), + AccessExpr::Subscript(Subscript::Index { + index: Expr::Value(number("2")), + }), + ], }, expr, ); @@ -2510,6 +2625,7 @@ fn parse_array_subquery_expr() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(Box::new(Query { with: None, @@ -2892,6 +3008,7 @@ fn test_composite_value() { Ident::new("information_schema"), Ident::new("_pg_expandarray") ]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -3069,6 +3186,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3081,6 +3199,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3093,6 +3212,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3105,6 +3225,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, @@ -3556,9 +3677,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -3580,6 +3699,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index f0c1f0c74..857d378bc 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -39,27 +39,18 @@ fn test_square_brackets_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('['), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('['), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('['), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('['), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -90,27 +81,18 @@ fn test_double_quotes_over_db_schema_table_name() { assert_eq!( select.from[0], TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![ - Ident { - value: "test_schema".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }, - Ident { - value: "test_table".to_string(), - quote_style: Some('"'), - span: Span::empty(), - } - ]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![ + Ident { + value: "test_schema".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }, + Ident { + value: "test_table".to_string(), + quote_style: Some('"'), + span: Span::empty(), + } + ])), joins: vec![], } ); @@ -130,9 +112,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -154,6 +134,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -176,6 +157,8 @@ fn parse_delimited_identifiers() { } redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); + // An alias starting with a number + redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#); redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } @@ -222,7 +205,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "o_orderkey".to_string(), @@ -245,7 +228,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -269,7 +252,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -279,6 +262,31 @@ fn test_redshift_json_path() { }, expr_from_projection(only(&select.projection)) ); + + let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::JsonAccess { + value: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("db1"), + Ident::new("sc1"), + Ident::new("tbl1"), + Ident::new("col1") + ])), + path: JsonPath { + path: vec![ + JsonPathElem::Bracket { + key: Expr::Value(number("0")) + }, + JsonPathElem::Dot { + key: "id".to_string(), + quoted: true, + } + ] + } + }, + expr_from_projection(only(&select.projection)) + ); } #[test] @@ -295,7 +303,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -319,7 +327,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -353,3 +361,24 @@ fn test_parse_json_path_from() { _ => panic!(), } } + +#[test] +fn test_parse_select_numbered_columns() { + // An alias starting with a number + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#); + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#); +} + +#[test] +fn test_parse_nested_quoted_identifier() { + redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#); + // trim spaces + redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 AS [" 1 "]"#); + // invalid query + assert!(redshift() + .parse_sql_statements(r#"SELECT 1 AS ["1]"#) + .is_err()); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 8eef09148..b5bf4a8d7 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -355,6 +355,15 @@ fn test_snowflake_create_table_column_comment() { } } +#[test] +fn test_snowflake_create_table_on_commit() { + snowflake().verified_stmt( + r#"CREATE LOCAL TEMPORARY TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT PRESERVE ROWS"#, + ); + snowflake().verified_stmt(r#"CREATE TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT DELETE ROWS"#); + snowflake().verified_stmt(r#"CREATE TABLE "AAA"."foo" ("bar" INTEGER) ON COMMIT DROP"#); +} + #[test] fn test_snowflake_create_local_table() { match snowflake().verified_stmt("CREATE TABLE my_table (a INT)") { @@ -1188,9 +1197,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, - with_ordinality: _, - partitions: _, - json_path: _, + .. } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -1212,6 +1219,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -1411,6 +1419,43 @@ fn test_alter_table_swap_with() { }; } +#[test] +fn test_alter_table_clustering() { + let sql = r#"ALTER TABLE tab CLUSTER BY (c1, "c2", TO_DATE(c3))"#; + match alter_table_op(snowflake_and_generic().verified_stmt(sql)) { + AlterTableOperation::ClusterBy { exprs } => { + assert_eq!( + exprs, + [ + Expr::Identifier(Ident::new("c1")), + Expr::Identifier(Ident::with_quote('"', "c2")), + Expr::Function(Function { + name: ObjectName(vec![Ident::new("TO_DATE")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("c3")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![] + }) + ], + ); + } + _ => unreachable!(), + } + + snowflake_and_generic().verified_stmt("ALTER TABLE tbl DROP CLUSTERING KEY"); + snowflake_and_generic().verified_stmt("ALTER TABLE tbl SUSPEND RECLUSTER"); + snowflake_and_generic().verified_stmt("ALTER TABLE tbl RESUME RECLUSTER"); +} + #[test] fn test_drop_stage() { match snowflake_and_generic().verified_stmt("DROP STAGE s1") { @@ -2656,7 +2701,7 @@ fn parse_use() { let quote_styles = ['\'', '"', '`']; for object_name in &valid_object_names { // Test single identifier without quotes - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE {}", object_name)), Statement::Use(Use::Object(ObjectName(vec![Ident::new( object_name.to_string() @@ -2664,7 +2709,7 @@ fn parse_use() { ); for "e in "e_styles { // Test single identifier with different type of quotes - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), Statement::Use(Use::Object(ObjectName(vec![Ident::with_quote( quote, @@ -2676,7 +2721,7 @@ fn parse_use() { for "e in "e_styles { // Test double identifier with different type of quotes - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE {0}CATALOG{0}.{0}my_schema{0}", quote)), Statement::Use(Use::Object(ObjectName(vec![ Ident::with_quote(quote, "CATALOG"), @@ -2685,7 +2730,7 @@ fn parse_use() { ); } // Test double identifier without quotes - std::assert_eq!( + assert_eq!( snowflake().verified_stmt("USE mydb.my_schema"), Statement::Use(Use::Object(ObjectName(vec![ Ident::new("mydb"), @@ -2695,37 +2740,55 @@ fn parse_use() { for "e in "e_styles { // Test single and double identifier with keyword and different type of quotes - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE DATABASE {0}my_database{0}", quote)), Statement::Use(Use::Database(ObjectName(vec![Ident::with_quote( quote, "my_database".to_string(), )]))) ); - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE SCHEMA {0}my_schema{0}", quote)), Statement::Use(Use::Schema(ObjectName(vec![Ident::with_quote( quote, "my_schema".to_string(), )]))) ); - std::assert_eq!( + assert_eq!( snowflake().verified_stmt(&format!("USE SCHEMA {0}CATALOG{0}.{0}my_schema{0}", quote)), Statement::Use(Use::Schema(ObjectName(vec![ Ident::with_quote(quote, "CATALOG"), Ident::with_quote(quote, "my_schema") ]))) ); + assert_eq!( + snowflake().verified_stmt(&format!("USE ROLE {0}my_role{0}", quote)), + Statement::Use(Use::Role(ObjectName(vec![Ident::with_quote( + quote, + "my_role".to_string(), + )]))) + ); + assert_eq!( + snowflake().verified_stmt(&format!("USE WAREHOUSE {0}my_wh{0}", quote)), + Statement::Use(Use::Warehouse(ObjectName(vec![Ident::with_quote( + quote, + "my_wh".to_string(), + )]))) + ); } // Test invalid syntax - missing identifier let invalid_cases = ["USE SCHEMA", "USE DATABASE", "USE WAREHOUSE"]; for sql in &invalid_cases { - std::assert_eq!( + assert_eq!( snowflake().parse_sql_statements(sql).unwrap_err(), ParserError::ParserError("Expected: identifier, found: EOF".to_string()), ); } + + snowflake().verified_stmt("USE SECONDARY ROLES ALL"); + snowflake().verified_stmt("USE SECONDARY ROLES NONE"); + snowflake().verified_stmt("USE SECONDARY ROLES r1, r2, r3"); } #[test] @@ -2905,3 +2968,25 @@ fn test_sf_double_dot_notation() { #[test] fn test_parse_double_dot_notation_wrong_position() {} + +#[test] +fn parse_insert_overwrite() { + let insert_overwrite_into = r#"INSERT OVERWRITE INTO schema.table SELECT a FROM b"#; + snowflake().verified_stmt(insert_overwrite_into); +} + +#[test] +fn test_table_sample() { + snowflake_and_generic().verified_stmt("SELECT * FROM testtable SAMPLE (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable TABLESAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10)"); + snowflake_and_generic().verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE ROW (10 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE BLOCK (3) SEED (82)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) REPEATABLE (82)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) REPEATABLE (1)"); + snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); +} diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c3cfb7a63..0adf7f755 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -419,6 +419,7 @@ fn parse_window_function_with_filter() { select.projection, vec![SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new(func_name)]), + uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, @@ -478,16 +479,7 @@ fn parse_update_tuple_row_values() { }], selection: None, table: TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident::new("x")]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - with_ordinality: false, - json_path: None, - }, + relation: table_from_name(ObjectName(vec![Ident::new("x")])), joins: vec![], }, from: None, @@ -527,9 +519,9 @@ fn parse_start_transaction_with_modifier() { sqlite_and_generic().verified_stmt("BEGIN DEFERRED TRANSACTION"); sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE TRANSACTION"); sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE TRANSACTION"); - sqlite_and_generic().one_statement_parses_to("BEGIN DEFERRED", "BEGIN DEFERRED TRANSACTION"); - sqlite_and_generic().one_statement_parses_to("BEGIN IMMEDIATE", "BEGIN IMMEDIATE TRANSACTION"); - sqlite_and_generic().one_statement_parses_to("BEGIN EXCLUSIVE", "BEGIN EXCLUSIVE TRANSACTION"); + sqlite_and_generic().verified_stmt("BEGIN DEFERRED"); + sqlite_and_generic().verified_stmt("BEGIN IMMEDIATE"); + sqlite_and_generic().verified_stmt("BEGIN EXCLUSIVE"); let unsupported_dialects = TestedDialects::new( all_dialects() @@ -569,6 +561,16 @@ fn test_dollar_identifier_as_placeholder() { } _ => unreachable!(), } + + // $$ is a valid placeholder in SQLite + match sqlite().verified_expr("id = $$") { + Expr::BinaryOp { op, left, right } => { + assert_eq!(op, BinaryOperator::Eq); + assert_eq!(left, Box::new(Expr::Identifier(Ident::new("id")))); + assert_eq!(right, Box::new(Expr::Value(Placeholder("$$".to_string())))); + } + _ => unreachable!(), + } } fn sqlite() -> TestedDialects {