From ac9ffc30dda805310c3ae006df778c22c11bbf10 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 21 Oct 2023 14:34:38 +0800 Subject: [PATCH] rename to struct Signed-off-by: jayzhan211 --- datafusion/common/src/scalar.rs | 87 +++++-------------- .../user_defined/user_defined_aggregates.rs | 2 +- .../proto/src/logical_plan/from_proto.rs | 2 +- datafusion/proto/src/logical_plan/to_proto.rs | 2 +- 4 files changed, 23 insertions(+), 70 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index e7d1f80e633e..404fc29f9874 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -101,7 +101,7 @@ pub enum ScalarValue { /// Represents a single element of a [`ListArray`] as an [`ArrayRef`] List(ArrayRef), /// Represents a single element of a [`StructArray`] as an [`ArrayRef`] - StructArr(ArrayRef), + Struct(ArrayRef), /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01 Date32(Option), /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01 @@ -203,8 +203,8 @@ impl PartialEq for ScalarValue { (Fixedsizelist(_, _, _), _) => false, (List(v1), List(v2)) => v1.eq(v2), (List(_), _) => false, - (StructArr(arr1), StructArr(arr2)) => arr1.eq(arr2), - (StructArr(_), _) => false, + (Struct(arr1), Struct(arr2)) => arr1.eq(arr2), + (Struct(_), _) => false, (Date32(v1), Date32(v2)) => v1.eq(v2), (Date32(_), _) => false, (Date64(v1), Date64(v2)) => v1.eq(v2), @@ -349,7 +349,7 @@ impl PartialOrd for ScalarValue { None } } - (StructArr(arr1), StructArr(arr2)) => { + (Struct(arr1), Struct(arr2)) => { if arr1.data_type() == arr2.data_type() { let struct_arr = as_struct_array(arr1).unwrap(); let struct_arr2 = as_struct_array(arr2).unwrap(); @@ -379,7 +379,7 @@ impl PartialOrd for ScalarValue { None } } - (StructArr(_), _) => None, + (Struct(_), _) => None, (List(_), _) => None, (Date32(v1), Date32(v2)) => v1.partial_cmp(v2), (Date32(_), _) => None, @@ -490,7 +490,7 @@ impl std::hash::Hash for ScalarValue { t.hash(state); l.hash(state); } - List(arr) | StructArr(arr) => { + List(arr) | Struct(arr) => { let arrays = vec![arr.to_owned()]; let hashes_buffer = &mut vec![0; arr.len()]; let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0); @@ -976,7 +976,7 @@ impl ScalarValue { Arc::new(Field::new("item", field.data_type().clone(), true)), *length, ), - ScalarValue::List(arr) | ScalarValue::StructArr(arr) => { + ScalarValue::List(arr) | ScalarValue::Struct(arr) => { arr.data_type().to_owned() } ScalarValue::Date32(_) => DataType::Date32, @@ -1125,7 +1125,7 @@ impl ScalarValue { ScalarValue::FixedSizeBinary(_, v) => v.is_none(), ScalarValue::LargeBinary(v) => v.is_none(), ScalarValue::Fixedsizelist(v, ..) => v.is_none(), - ScalarValue::List(arr) | ScalarValue::StructArr(arr) => { + ScalarValue::List(arr) | ScalarValue::Struct(arr) => { arr.len() == arr.null_count() } ScalarValue::Date32(v) => v.is_none(), @@ -1501,54 +1501,7 @@ impl ScalarValue { let list_array = ScalarValue::iter_to_array_list(scalars)?; Arc::new(list_array) } - DataType::Struct(_) => todo!("Not needed for now"), - // DataType::Struct(fields) => { - // // Initialize a Vector to store the ScalarValues for each column - // let mut columns: Vec> = - // (0..fields.len()).map(|_| Vec::new()).collect(); - - // // null mask - // let mut null_mask_builder = BooleanBuilder::new(); - - // // Iterate over scalars to populate the column scalars for each row - // for scalar in scalars { - // if let ScalarValue::Struct(values, fields) = scalar { - // match values { - // Some(values) => { - // // Push value for each field - // for (column, value) in columns.iter_mut().zip(values) { - // column.push(value.clone()); - // } - // null_mask_builder.append_value(false); - // } - // None => { - // // Push NULL of the appropriate type for each field - // for (column, field) in - // columns.iter_mut().zip(fields.as_ref()) - // { - // column - // .push(ScalarValue::try_from(field.data_type())?); - // } - // null_mask_builder.append_value(true); - // } - // }; - // } else { - // return _internal_err!("Expected Struct but found: {scalar}"); - // }; - // } - - // // Call iter_to_array recursively to convert the scalars for each column into Arrow arrays - // let field_values = fields - // .iter() - // .zip(columns) - // .map(|(field, column)| { - // Ok((field.clone(), Self::iter_to_array(column)?)) - // }) - // .collect::>>()?; - - // let array = StructArray::from(field_values); - // arrow::compute::nullif(&array, &null_mask_builder.finish())? - // } + DataType::Struct(_) => unimplemented!("Not needed for now"), DataType::Dictionary(key_type, value_type) => { // create the values array let value_scalars = scalars @@ -1869,7 +1822,7 @@ impl ScalarValue { } pub fn new_struct(fields: Fields, arrays: Vec) -> Self { - ScalarValue::StructArr(Arc::new(StructArray::new(fields, arrays, None))) + ScalarValue::Struct(Arc::new(StructArray::new(fields, arrays, None))) } /// Converts a scalar value into an array of `size` rows. @@ -1994,7 +1947,7 @@ impl ScalarValue { ScalarValue::Fixedsizelist(..) => { unimplemented!("FixedSizeList is not supported yet") } - ScalarValue::List(arr) | ScalarValue::StructArr(arr) => { + ScalarValue::List(arr) | ScalarValue::Struct(arr) => { let arrays = std::iter::repeat(arr.as_ref()) .take(size) .collect::>(); @@ -2385,7 +2338,7 @@ impl ScalarValue { } let struct_arr_at_index = StructArray::try_new(fields.to_owned(), field_arrays, None)?; - Self::StructArr(Arc::new(struct_arr_at_index)) + Self::Struct(Arc::new(struct_arr_at_index)) } DataType::FixedSizeBinary(_) => { let array = as_fixed_size_binary_array(array)?; @@ -2562,7 +2515,7 @@ impl ScalarValue { eq_array_primitive!(array, index, LargeBinaryArray, val) } ScalarValue::Fixedsizelist(..) => unimplemented!(), - ScalarValue::List(_) | ScalarValue::StructArr(_) => unimplemented!("ListArr"), + ScalarValue::List(_) | ScalarValue::Struct(_) => unimplemented!("ListArr"), ScalarValue::Date32(val) => { eq_array_primitive!(array, index, Date32Array, val) } @@ -2689,7 +2642,7 @@ impl ScalarValue { // `field` is boxed, so it is NOT already included in `self` + field.size() } - ScalarValue::List(arr) | ScalarValue::StructArr(arr) => { + ScalarValue::List(arr) | ScalarValue::Struct(arr) => { arr.get_array_memory_size() } ScalarValue::Dictionary(dt, sv) => { @@ -2772,7 +2725,7 @@ impl From> for ScalarValue { let fields = fields.finish().fields; let struct_array = StructArray::try_new(fields, arrays, None).unwrap(); - Self::StructArr(Arc::new(struct_array)) + Self::Struct(Arc::new(struct_array)) } } @@ -2979,7 +2932,7 @@ impl TryFrom<&DataType> for ScalarValue { ), DataType::List(_) => ScalarValue::List(new_null_array(&DataType::Null, 0)), DataType::Struct(fields) => { - ScalarValue::StructArr(Arc::new(StructArray::new_null(fields.clone(), 0))) + ScalarValue::Struct(Arc::new(StructArray::new_null(fields.clone(), 0))) } DataType::Null => ScalarValue::Null, _ => { @@ -3050,7 +3003,7 @@ impl fmt::Display for ScalarValue { )?, None => write!(f, "NULL")?, }, - ScalarValue::List(arr) | ScalarValue::StructArr(arr) => write!( + ScalarValue::List(arr) | ScalarValue::Struct(arr) => write!( f, "{}", arrow::util::pretty::pretty_format_columns("col", &[arr.to_owned()]) @@ -3120,7 +3073,7 @@ impl fmt::Debug for ScalarValue { ScalarValue::LargeBinary(Some(_)) => write!(f, "LargeBinary(\"{self}\")"), ScalarValue::Fixedsizelist(..) => write!(f, "FixedSizeList([{self}])"), ScalarValue::List(arr) => write!(f, "List([{arr:?}])"), - ScalarValue::StructArr(arr) => write!(f, "Struct([{arr:?}])"), + ScalarValue::Struct(arr) => write!(f, "Struct([{arr:?}])"), ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"), ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"), ScalarValue::Time32Second(_) => write!(f, "Time32Second(\"{self}\")"), @@ -3259,7 +3212,7 @@ mod tests { int.clone() as ArrayRef, ), ]); - let sv = ScalarValue::StructArr(Arc::new(struct_array)); + let sv = ScalarValue::Struct(Arc::new(struct_array)); let actual_arr = sv.to_array_of_size(2); let boolean = Arc::new(BooleanArray::from(vec![ @@ -4021,7 +3974,7 @@ mod tests { ), ]); let array: ArrayRef = Arc::new(struct_array); - assert_eq!(actual, ScalarValue::StructArr(array),); + assert_eq!(actual, ScalarValue::Struct(array),); Ok(()) } diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index e1f2b6681e52..4e4e60ea4ebb 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -578,7 +578,7 @@ impl FirstSelector { let struct_arr = StructArray::try_new(Self::fields(), vec![f64arr, timearr], None)?; - Ok(ScalarValue::StructArr(Arc::new(struct_arr))) + Ok(ScalarValue::Struct(Arc::new(struct_arr))) } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index ecd43ae01a9c..a293b9871b99 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -731,7 +731,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { .map_err(DataFusionError::ArrowError) .map_err(|e| e.context("Decoding ScalarValue::List Value"))?; let arr = record_batch.column(0); - Self::StructArr(arr.to_owned()) + Self::Struct(arr.to_owned()) } Value::NullValue(v) => { let null_type: DataType = v.try_into()?; diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 0d4b2840a590..c2b35b293d3a 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1164,7 +1164,7 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue { } // ScalarValue::Struct is serialized using Arrow IPC messages. // as a single column RecordBatch - ScalarValue::StructArr(arr) => { + ScalarValue::Struct(arr) => { // Wrap in a "field_name" column let batch = RecordBatch::try_from_iter(vec![( "field_name",