Skip to content

Commit

Permalink
feat(12118): logical plan support for Utf8View
Browse files Browse the repository at this point in the history
  • Loading branch information
wiedld committed Aug 27, 2024
1 parent 533ddcb commit d7be771
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
4 changes: 3 additions & 1 deletion datafusion/substrait/src/logical_plan/consumer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use crate::variation_const::{
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
};
#[allow(deprecated)]
use crate::variation_const::{
Expand Down Expand Up @@ -1442,6 +1442,7 @@ fn from_substrait_type(
r#type::Kind::String(string) => match string.type_variation_reference {
DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Utf8),
LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeUtf8),
VIEW_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Utf8View),
v => not_impl_err!(
"Unsupported Substrait type variation {v} of type {s_kind:?}"
),
Expand Down Expand Up @@ -1759,6 +1760,7 @@ fn from_substrait_literal(
Some(LiteralType::String(s)) => match lit.type_variation_reference {
DEFAULT_CONTAINER_TYPE_VARIATION_REF => ScalarValue::Utf8(Some(s.clone())),
LARGE_CONTAINER_TYPE_VARIATION_REF => ScalarValue::LargeUtf8(Some(s.clone())),
VIEW_CONTAINER_TYPE_VARIATION_REF => ScalarValue::Utf8View(Some(s.clone())),
others => {
return substrait_err!("Unknown type variation reference {others}");
}
Expand Down
13 changes: 12 additions & 1 deletion datafusion/substrait/src/logical_plan/producer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use crate::variation_const::{
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
};
use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
use datafusion::common::{
Expand Down Expand Up @@ -1474,6 +1474,12 @@ fn to_substrait_type(
nullability,
})),
}),
DataType::Utf8View => Ok(substrait::proto::Type {
kind: Some(r#type::Kind::String(r#type::String {
type_variation_reference: VIEW_CONTAINER_TYPE_VARIATION_REF,
nullability,
})),
}),
DataType::List(inner) => {
let inner_type =
to_substrait_type(inner.data_type(), inner.is_nullable(), extensions)?;
Expand Down Expand Up @@ -1926,6 +1932,10 @@ fn to_substrait_literal(
LiteralType::String(s.clone()),
LARGE_CONTAINER_TYPE_VARIATION_REF,
),
ScalarValue::Utf8View(Some(s)) => (
LiteralType::String(s.clone()),
VIEW_CONTAINER_TYPE_VARIATION_REF,
),
ScalarValue::Decimal128(v, p, s) if v.is_some() => (
LiteralType::Decimal(Decimal {
value: v.unwrap().to_le_bytes().to_vec(),
Expand Down Expand Up @@ -2353,6 +2363,7 @@ mod test {
round_trip_type(DataType::LargeBinary)?;
round_trip_type(DataType::Utf8)?;
round_trip_type(DataType::LargeUtf8)?;
round_trip_type(DataType::Utf8View)?;
round_trip_type(DataType::Decimal128(10, 2))?;
round_trip_type(DataType::Decimal256(30, 2))?;

Expand Down
1 change: 1 addition & 0 deletions datafusion/substrait/src/variation_const.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ pub const DATE_32_TYPE_VARIATION_REF: u32 = 0;
pub const DATE_64_TYPE_VARIATION_REF: u32 = 1;
pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;

Expand Down
4 changes: 3 additions & 1 deletion datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,8 @@ async fn all_type_literal() -> Result<()> {
binary_col = arrow_cast('binary', 'Binary') AND
large_binary_col = arrow_cast('large_binary', 'LargeBinary') AND
utf8_col = arrow_cast('utf8', 'Utf8') AND
large_utf8_col = arrow_cast('large_utf8', 'LargeUtf8');",
large_utf8_col = arrow_cast('large_utf8', 'LargeUtf8') AND
view_utf8_col = arrow_cast('utf8_view', 'Utf8View');",
)
.await
}
Expand Down Expand Up @@ -1234,6 +1235,7 @@ async fn create_all_type_context() -> Result<SessionContext> {
Field::new("fixed_size_binary_col", DataType::FixedSizeBinary(42), true),
Field::new("utf8_col", DataType::Utf8, true),
Field::new("large_utf8_col", DataType::LargeUtf8, true),
Field::new("view_utf8_col", DataType::Utf8View, true),
Field::new_list("list_col", Field::new("item", DataType::Int64, true), true),
Field::new_list(
"large_list_col",
Expand Down

0 comments on commit d7be771

Please sign in to comment.