Skip to content

Commit

Permalink
bigquery: CAST AS x FORMAT support
Browse files Browse the repository at this point in the history
  • Loading branch information
lustefaniak committed Oct 20, 2023
1 parent 83cb734 commit ebe32db
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 5 deletions.
68 changes: 65 additions & 3 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,20 @@ impl fmt::Display for JsonOperator {
}
}

/// Options for `CAST` / `TRY_CAST`
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(
feature = "visitor",
derive(Visit, VisitMut),
visit(with = "visit_expr")
)]
pub enum CastFormat {
Value(Value),
ValueAtTimeZone(Value, Value),
}

/// An SQL expression of any type.
///
/// The parser does not distinguish between expressions of different types
Expand Down Expand Up @@ -437,19 +451,28 @@ pub enum Expr {
Cast {
expr: Box<Expr>,
data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
},
/// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))`
// this differs from CAST in the choice of how to implement invalid conversions
TryCast {
expr: Box<Expr>,
data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
},
/// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)`
// only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting
// this works the same as `TRY_CAST`
SafeCast {
expr: Box<Expr>,
data_type: DataType,
// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery
// https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
format: Option<CastFormat>,
},
/// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'`
AtTimeZone {
Expand Down Expand Up @@ -597,6 +620,15 @@ pub enum Expr {
},
}

impl fmt::Display for CastFormat {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
CastFormat::Value(v) => write!(f, "{v}"),
CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"),
}
}
}

impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expand Down Expand Up @@ -753,9 +785,39 @@ impl fmt::Display for Expr {
write!(f, "{op}{expr}")
}
}
Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"),
Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"),
Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"),
Expr::Cast {
expr,
data_type,
format,
} => {
if let Some(format) = format {
write!(f, "CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "CAST({expr} AS {data_type})")
}
}
Expr::TryCast {
expr,
data_type,
format,
} => {
if let Some(format) = format {
write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "TRY_CAST({expr} AS {data_type})")
}
}
Expr::SafeCast {
expr,
data_type,
format,
} => {
if let Some(format) = format {
write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})")
} else {
write!(f, "SAFE_CAST({expr} AS {data_type})")
}
}
Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"),
Expr::Ceil { expr, field } => {
if field == &DateTimeField::NoDateTime {
Expand Down
23 changes: 23 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> {
})
}

pub fn parse_optional_cast_format(&mut self) -> Result<Option<CastFormat>, ParserError> {
if self.parse_keyword(Keyword::FORMAT) {
let value = self.parse_value()?;
if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) {
Ok(Some(CastFormat::ValueAtTimeZone(
value,
self.parse_value()?,
)))
} else {
Ok(Some(CastFormat::Value(value)))
}
} else {
Ok(None)
}
}

/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn parse_cast_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Cast {
expr: Box::new(expr),
data_type,
format,
})
}

Expand All @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> {
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::TryCast {
expr: Box::new(expr),
data_type,
format,
})
}

Expand All @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> {
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
let format = self.parse_optional_cast_format()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::SafeCast {
expr: Box::new(expr),
data_type,
format,
})
}

Expand Down Expand Up @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> {
Ok(Expr::Cast {
expr: Box::new(expr),
data_type: self.parse_data_type()?,
format: None,
})
}

Expand Down
33 changes: 32 additions & 1 deletion tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,38 @@ fn parse_trailing_comma() {
#[test]
fn parse_cast_type() {
let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#;
bigquery().verified_only_select(sql);
bigquery_and_generic().verified_only_select(sql);
}

#[test]
fn parse_cast_date_format() {
let sql =
r#"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"#;
bigquery_and_generic().verified_only_select(sql);
}

#[test]
fn parse_cast_time_format() {
let sql = r#"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"#;
bigquery_and_generic().verified_only_select(sql);
}

#[test]
fn parse_cast_timestamp_format_tz() {
let sql = r#"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"#;
bigquery_and_generic().verified_only_select(sql);
}

#[test]
fn parse_cast_string_to_bytes_format() {
let sql = r#"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"#;
bigquery_and_generic().verified_only_select(sql);
}

#[test]
fn parse_cast_bytes_to_string_format() {
let sql = r#"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"#;
bigquery_and_generic().verified_only_select(sql);
}

#[test]
Expand Down
10 changes: 10 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::BigInt(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -1944,6 +1945,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::TinyInt(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -1970,6 +1972,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Nvarchar(Some(50)),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -1980,6 +1983,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Clob(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -1990,6 +1994,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Clob(Some(50)),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -2000,6 +2005,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Binary(Some(50)),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -2010,6 +2016,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Varbinary(Some(50)),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -2020,6 +2027,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Blob(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -2030,6 +2038,7 @@ fn parse_cast() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::Blob(Some(50)),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand All @@ -2043,6 +2052,7 @@ fn parse_try_cast() {
&Expr::TryCast {
expr: Box::new(Expr::Identifier(Ident::new("id"))),
data_type: DataType::BigInt(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand Down
3 changes: 2 additions & 1 deletion tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() {
})),
data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new(
DataType::Int(None)
))))))
)))))),
format: None,
}))),
indexes: vec![num[1].clone(), num[2].clone()],
},
Expand Down
1 change: 1 addition & 0 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ fn parse_array() {
&Expr::Cast {
expr: Box::new(Expr::Identifier(Ident::new("a"))),
data_type: DataType::Array(None),
format: None,
},
expr_from_projection(only(&select.projection))
);
Expand Down

0 comments on commit ebe32db

Please sign in to comment.