diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 8958ca6fae6..a6c2aee7cbc 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -18,8 +18,9 @@ //! A two-dimensional batch of column-oriented data with a defined //! [schema](arrow_schema::Schema). +use crate::cast::AsArray; use crate::{new_empty_array, Array, ArrayRef, StructArray}; -use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaBuilder, SchemaRef}; +use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema, SchemaBuilder, SchemaRef}; use std::ops::Index; use std::sync::Arc; @@ -394,6 +395,108 @@ impl RecordBatch { ) } + /// Normalize a semi-structured [`RecordBatch`] into a flat table. + /// + /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level` + /// (unlimited if `None`). + /// + /// e.g. given a [`RecordBatch`] with schema: + /// + /// ```text + /// "foo": StructArray<"bar": Utf8> + /// ``` + /// + /// A separator of `"."` would generate a batch with the schema: + /// + /// ```text + /// "foo.bar": Utf8 + /// ``` + /// + /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`; + /// it will be treated as unlimited. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow_array::{ArrayRef, Int64Array, StringArray, StructArray, RecordBatch}; + /// # use arrow_schema::{DataType, Field, Fields, Schema}; + /// # + /// let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", ""])); + /// let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), Some(4)])); + /// + /// let animals_field = Arc::new(Field::new("animals", DataType::Utf8, true)); + /// let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, true)); + /// + /// let a = Arc::new(StructArray::from(vec![ + /// (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef), + /// (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef), + /// ])); + /// + /// let schema = Schema::new(vec![ + /// Field::new( + /// "a", + /// DataType::Struct(Fields::from(vec![animals_field, n_legs_field])), + /// false, + /// ) + /// ]); + /// + /// let normalized = RecordBatch::try_new(Arc::new(schema), vec![a]) + /// .expect("valid conversion") + /// .normalize(".", None) + /// .expect("valid normalization"); + /// + /// let expected = RecordBatch::try_from_iter_with_nullable(vec![ + /// ("a.animals", animals.clone(), true), + /// ("a.n_legs", n_legs.clone(), true), + /// ]) + /// .expect("valid conversion"); + /// + /// assert_eq!(expected, normalized); + /// ``` + pub fn normalize(&self, separator: &str, max_level: Option) -> Result { + let max_level = match max_level.unwrap_or(usize::MAX) { + 0 => usize::MAX, + val => val, + }; + let mut stack: Vec<(usize, &ArrayRef, Vec<&str>, &FieldRef)> = self + .columns + .iter() + .zip(self.schema.fields()) + .rev() + .map(|(c, f)| { + let name_vec: Vec<&str> = vec![f.name()]; + (0, c, name_vec, f) + }) + .collect(); + let mut columns: Vec = Vec::new(); + let mut fields: Vec = Vec::new(); + + while let Some((depth, c, name, field_ref)) = stack.pop() { + match field_ref.data_type() { + DataType::Struct(ff) if depth < max_level => { + // Need to zip these in reverse to maintain original order + for (cff, fff) in c.as_struct().columns().iter().zip(ff.into_iter()).rev() { + let mut name = name.clone(); + name.push(separator); + name.push(fff.name()); + stack.push((depth + 1, cff, name, fff)) + } + } + _ => { + let updated_field = Field::new( + name.concat(), + field_ref.data_type().clone(), + field_ref.is_nullable(), + ); + columns.push(c.clone()); + fields.push(Arc::new(updated_field)); + } + } + } + RecordBatch::try_new(Arc::new(Schema::new(fields)), columns) + } + /// Returns the number of columns in the record batch. /// /// # Example @@ -768,8 +871,6 @@ where #[cfg(test)] mod tests { - use std::collections::HashMap; - use super::*; use crate::{ BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray, StringViewArray, @@ -777,6 +878,7 @@ mod tests { use arrow_buffer::{Buffer, ToByteSlice}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::Fields; + use std::collections::HashMap; #[test] fn create_record_batch() { @@ -1197,6 +1299,181 @@ mod tests { assert_ne!(batch1, batch2); } + #[test] + fn normalize_simple() { + let animals: ArrayRef = Arc::new(StringArray::from(vec!["Parrot", ""])); + let n_legs: ArrayRef = Arc::new(Int64Array::from(vec![Some(2), Some(4)])); + let year: ArrayRef = Arc::new(Int64Array::from(vec![None, Some(2022)])); + + let animals_field = Arc::new(Field::new("animals", DataType::Utf8, true)); + let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, true)); + let year_field = Arc::new(Field::new("year", DataType::Int64, true)); + + let a = Arc::new(StructArray::from(vec![ + (animals_field.clone(), Arc::new(animals.clone()) as ArrayRef), + (n_legs_field.clone(), Arc::new(n_legs.clone()) as ArrayRef), + (year_field.clone(), Arc::new(year.clone()) as ArrayRef), + ])); + + let month = Arc::new(Int64Array::from(vec![Some(4), Some(6)])); + + let schema = Schema::new(vec![ + Field::new( + "a", + DataType::Struct(Fields::from(vec![animals_field, n_legs_field, year_field])), + false, + ), + Field::new("month", DataType::Int64, true), + ]); + + let normalized = + RecordBatch::try_new(Arc::new(schema.clone()), vec![a.clone(), month.clone()]) + .expect("valid conversion") + .normalize(".", Some(0)) + .expect("valid normalization"); + + let expected = RecordBatch::try_from_iter_with_nullable(vec![ + ("a.animals", animals.clone(), true), + ("a.n_legs", n_legs.clone(), true), + ("a.year", year.clone(), true), + ("month", month.clone(), true), + ]) + .expect("valid conversion"); + + assert_eq!(expected, normalized); + + // check 0 and None have the same effect + let normalized = RecordBatch::try_new(Arc::new(schema), vec![a, month.clone()]) + .expect("valid conversion") + .normalize(".", None) + .expect("valid normalization"); + + assert_eq!(expected, normalized); + } + + #[test] + fn normalize_nested() { + // Initialize schema + let a = Arc::new(Field::new("a", DataType::Int64, true)); + let b = Arc::new(Field::new("b", DataType::Int64, false)); + let c = Arc::new(Field::new("c", DataType::Int64, true)); + + let one = Arc::new(Field::new( + "1", + DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])), + false, + )); + let two = Arc::new(Field::new( + "2", + DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])), + true, + )); + + let exclamation = Arc::new(Field::new( + "!", + DataType::Struct(Fields::from(vec![one.clone(), two.clone()])), + false, + )); + + let schema = Schema::new(vec![exclamation.clone()]); + + // Initialize fields + let a_field = Int64Array::from(vec![Some(0), Some(1)]); + let b_field = Int64Array::from(vec![Some(2), Some(3)]); + let c_field = Int64Array::from(vec![None, Some(4)]); + + let one_field = StructArray::from(vec![ + (a.clone(), Arc::new(a_field.clone()) as ArrayRef), + (b.clone(), Arc::new(b_field.clone()) as ArrayRef), + (c.clone(), Arc::new(c_field.clone()) as ArrayRef), + ]); + let two_field = StructArray::from(vec![ + (a.clone(), Arc::new(a_field.clone()) as ArrayRef), + (b.clone(), Arc::new(b_field.clone()) as ArrayRef), + (c.clone(), Arc::new(c_field.clone()) as ArrayRef), + ]); + + let exclamation_field = Arc::new(StructArray::from(vec![ + (one.clone(), Arc::new(one_field) as ArrayRef), + (two.clone(), Arc::new(two_field) as ArrayRef), + ])); + + // Normalize top level + let normalized = + RecordBatch::try_new(Arc::new(schema.clone()), vec![exclamation_field.clone()]) + .expect("valid conversion") + .normalize(".", Some(1)) + .expect("valid normalization"); + + let expected = RecordBatch::try_from_iter_with_nullable(vec![ + ( + "!.1", + Arc::new(StructArray::from(vec![ + (a.clone(), Arc::new(a_field.clone()) as ArrayRef), + (b.clone(), Arc::new(b_field.clone()) as ArrayRef), + (c.clone(), Arc::new(c_field.clone()) as ArrayRef), + ])) as ArrayRef, + false, + ), + ( + "!.2", + Arc::new(StructArray::from(vec![ + (a.clone(), Arc::new(a_field.clone()) as ArrayRef), + (b.clone(), Arc::new(b_field.clone()) as ArrayRef), + (c.clone(), Arc::new(c_field.clone()) as ArrayRef), + ])) as ArrayRef, + true, + ), + ]) + .expect("valid conversion"); + + assert_eq!(expected, normalized); + + // Normalize all levels + let normalized = RecordBatch::try_new(Arc::new(schema), vec![exclamation_field]) + .expect("valid conversion") + .normalize(".", None) + .expect("valid normalization"); + + let expected = RecordBatch::try_from_iter_with_nullable(vec![ + ("!.1.a", Arc::new(a_field.clone()) as ArrayRef, true), + ("!.1.b", Arc::new(b_field.clone()) as ArrayRef, false), + ("!.1.c", Arc::new(c_field.clone()) as ArrayRef, true), + ("!.2.a", Arc::new(a_field.clone()) as ArrayRef, true), + ("!.2.b", Arc::new(b_field.clone()) as ArrayRef, false), + ("!.2.c", Arc::new(c_field.clone()) as ArrayRef, true), + ]) + .expect("valid conversion"); + + assert_eq!(expected, normalized); + } + + #[test] + fn normalize_empty() { + let animals_field = Arc::new(Field::new("animals", DataType::Utf8, true)); + let n_legs_field = Arc::new(Field::new("n_legs", DataType::Int64, true)); + let year_field = Arc::new(Field::new("year", DataType::Int64, true)); + + let schema = Schema::new(vec![ + Field::new( + "a", + DataType::Struct(Fields::from(vec![animals_field, n_legs_field, year_field])), + false, + ), + Field::new("month", DataType::Int64, true), + ]); + + let normalized = RecordBatch::new_empty(Arc::new(schema.clone())) + .normalize(".", Some(0)) + .expect("valid normalization"); + + let expected = RecordBatch::new_empty(Arc::new( + schema.normalize(".", Some(0)).expect("valid normalization"), + )); + + assert_eq!(expected, normalized); + } + #[test] fn project() { let a: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])); diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 6c79da53f98..9affd416299 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::error::ArrowError; use crate::field::Field; -use crate::{FieldRef, Fields}; +use crate::{DataType, FieldRef, Fields}; /// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`] #[derive(Debug, Default)] @@ -418,6 +418,89 @@ impl Schema { &self.metadata } + /// Normalize a [`Schema`] into a flat table. + /// + /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level` + /// (unlimited if `None`). + /// + /// e.g. given a [`Schema`]: + /// + /// ```text + /// "foo": StructArray<"bar": Utf8> + /// ``` + /// + /// A separator of `"."` would generate a batch with the schema: + /// + /// ```text + /// "foo.bar": Utf8 + /// ``` + /// + /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`; + /// it will be treated as unlimited. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow_schema::{DataType, Field, Fields, Schema}; + /// let schema = Schema::new(vec![ + /// Field::new( + /// "a", + /// DataType::Struct(Fields::from(vec![ + /// Arc::new(Field::new("animals", DataType::Utf8, true)), + /// Arc::new(Field::new("n_legs", DataType::Int64, true)), + /// ])), + /// false, + /// ), + /// ]) + /// .normalize(".", None) + /// .expect("valid normalization"); + /// let expected = Schema::new(vec![ + /// Field::new("a.animals", DataType::Utf8, true), + /// Field::new("a.n_legs", DataType::Int64, true), + /// ]); + /// assert_eq!(schema, expected); + /// ``` + pub fn normalize(&self, separator: &str, max_level: Option) -> Result { + let max_level = match max_level.unwrap_or(usize::MAX) { + 0 => usize::MAX, + val => val, + }; + let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self + .fields() + .iter() + .rev() + .map(|f| { + let name_vec: Vec<&str> = vec![f.name()]; + (0, name_vec, f) + }) + .collect(); + let mut fields: Vec = Vec::new(); + + while let Some((depth, name, field_ref)) = stack.pop() { + match field_ref.data_type() { + DataType::Struct(ff) if depth < max_level => { + // Need to zip these in reverse to maintain original order + for fff in ff.into_iter().rev() { + let mut name = name.clone(); + name.push(separator); + name.push(fff.name()); + stack.push((depth + 1, name, fff)) + } + } + _ => { + let updated_field = Field::new( + name.concat(), + field_ref.data_type().clone(), + field_ref.is_nullable(), + ); + fields.push(Arc::new(updated_field)); + } + } + } + Ok(Schema::new(fields)) + } + /// Look up a column by name and return a immutable reference to the column along with /// its index. pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> { @@ -679,6 +762,410 @@ mod tests { schema.index_of("nickname").unwrap(); } + #[test] + fn normalize_simple() { + let schema = Schema::new(vec![ + Field::new( + "a", + DataType::Struct(Fields::from(vec![ + Arc::new(Field::new("animals", DataType::Utf8, true)), + Arc::new(Field::new("n_legs", DataType::Int64, true)), + Arc::new(Field::new("year", DataType::Int64, true)), + ])), + false, + ), + Field::new("month", DataType::Int64, true), + ]) + .normalize(".", Some(0)) + .expect("valid normalization"); + + let expected = Schema::new(vec![ + Field::new("a.animals", DataType::Utf8, true), + Field::new("a.n_legs", DataType::Int64, true), + Field::new("a.year", DataType::Int64, true), + Field::new("month", DataType::Int64, true), + ]); + + assert_eq!(schema, expected); + + // Check that 0, None have the same result + let schema = Schema::new(vec![ + Field::new( + "a", + DataType::Struct(Fields::from(vec![ + Arc::new(Field::new("animals", DataType::Utf8, true)), + Arc::new(Field::new("n_legs", DataType::Int64, true)), + Arc::new(Field::new("year", DataType::Int64, true)), + ])), + false, + ), + Field::new("month", DataType::Int64, true), + ]) + .normalize(".", None) + .expect("valid normalization"); + + assert_eq!(schema, expected); + } + + #[test] + fn normalize_nested() { + let a = Arc::new(Field::new("a", DataType::Utf8, true)); + let b = Arc::new(Field::new("b", DataType::Int64, false)); + let c = Arc::new(Field::new("c", DataType::Int64, true)); + + let d = Arc::new(Field::new("d", DataType::Utf8, true)); + let e = Arc::new(Field::new("e", DataType::Int64, false)); + let f = Arc::new(Field::new("f", DataType::Int64, true)); + + let one = Arc::new(Field::new( + "1", + DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])), + false, + )); + let two = Arc::new(Field::new( + "2", + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + )); + + let exclamation = Arc::new(Field::new( + "!", + DataType::Struct(Fields::from(vec![one, two])), + false, + )); + + let normalize_all = Schema::new(vec![exclamation.clone()]) + .normalize(".", Some(0)) + .expect("valid normalization"); + + let expected = Schema::new(vec![ + Field::new("!.1.a", DataType::Utf8, true), + Field::new("!.1.b", DataType::Int64, false), + Field::new("!.1.c", DataType::Int64, true), + Field::new("!.2.d", DataType::Utf8, true), + Field::new("!.2.e", DataType::Int64, false), + Field::new("!.2.f", DataType::Int64, true), + ]); + + assert_eq!(normalize_all, expected); + + let normalize_depth_one = Schema::new(vec![exclamation]) + .normalize(".", Some(1)) + .expect("valid normalization"); + + let expected = Schema::new(vec![ + Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false), + Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true), + ]); + + assert_eq!(normalize_depth_one, expected); + } + + #[test] + fn normalize_list() { + // Only the Struct type field should be unwrapped + let a = Arc::new(Field::new("a", DataType::Utf8, true)); + let b = Arc::new(Field::new("b", DataType::Int64, false)); + let c = Arc::new(Field::new("c", DataType::Int64, true)); + let d = Arc::new(Field::new("d", DataType::Utf8, true)); + let e = Arc::new(Field::new("e", DataType::Int64, false)); + let f = Arc::new(Field::new("f", DataType::Int64, true)); + + let one = Arc::new(Field::new( + "1", + DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])), + true, + )); + + let two = Arc::new(Field::new( + "2", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + ))), + false, + )); + + let exclamation = Arc::new(Field::new( + "!", + DataType::Struct(Fields::from(vec![one.clone(), two.clone()])), + false, + )); + + let normalize_all = Schema::new(vec![exclamation.clone()]) + .normalize(".", None) + .expect("valid normalization"); + + // List shouldn't be affected + let expected = Schema::new(vec![ + Field::new("!.1.a", DataType::Utf8, true), + Field::new("!.1.b", DataType::Int64, false), + Field::new("!.1.c", DataType::Int64, true), + Field::new( + "!.2", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + ))), + false, + ), + ]); + + assert_eq!(normalize_all, expected); + assert_eq!(normalize_all.fields().len(), 4); + + // FixedSizeList + let two = Arc::new(Field::new( + "2", + DataType::FixedSizeList( + Arc::new(Field::new_fixed_size_list( + "3", + Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + )), + 1, + true, + )), + 1, + ), + false, + )); + + let exclamation = Arc::new(Field::new( + "!", + DataType::Struct(Fields::from(vec![one.clone(), two])), + false, + )); + + let normalize_all = Schema::new(vec![exclamation.clone()]) + .normalize(".", None) + .expect("valid normalization"); + + // FixedSizeList shouldn't be affected + let expected = Schema::new(vec![ + Field::new("!.1.a", DataType::Utf8, true), + Field::new("!.1.b", DataType::Int64, false), + Field::new("!.1.c", DataType::Int64, true), + Field::new( + "!.2", + DataType::FixedSizeList( + Arc::new(Field::new_fixed_size_list( + "3", + Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + )), + 1, + true, + )), + 1, + ), + false, + ), + ]); + + assert_eq!(normalize_all, expected); + assert_eq!(normalize_all.fields().len(), 4); + + // LargeList + let two = Arc::new(Field::new( + "2", + DataType::FixedSizeList( + Arc::new(Field::new_large_list( + "3", + Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + )), + true, + )), + 1, + ), + false, + )); + + let exclamation = Arc::new(Field::new( + "!", + DataType::Struct(Fields::from(vec![one.clone(), two])), + false, + )); + + let normalize_all = Schema::new(vec![exclamation.clone()]) + .normalize(".", None) + .expect("valid normalization"); + + // LargeList shouldn't be affected + let expected = Schema::new(vec![ + Field::new("!.1.a", DataType::Utf8, true), + Field::new("!.1.b", DataType::Int64, false), + Field::new("!.1.c", DataType::Int64, true), + Field::new( + "!.2", + DataType::FixedSizeList( + Arc::new(Field::new_large_list( + "3", + Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + )), + true, + )), + 1, + ), + false, + ), + ]); + + assert_eq!(normalize_all, expected); + assert_eq!(normalize_all.fields().len(), 4); + } + + #[test] + fn normalize_deep_nested() { + // No unwrapping expected + let a = Arc::new(Field::new("a", DataType::Utf8, true)); + let b = Arc::new(Field::new("b", DataType::Int64, false)); + let c = Arc::new(Field::new("c", DataType::Int64, true)); + let d = Arc::new(Field::new("d", DataType::Utf8, true)); + let e = Arc::new(Field::new("e", DataType::Int64, false)); + let f = Arc::new(Field::new("f", DataType::Int64, true)); + + let one = Arc::new(Field::new( + "1", + DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])), + true, + )); + + let two = Arc::new(Field::new( + "2", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])), + true, + ))), + false, + )); + + let l10 = Arc::new(Field::new( + "l10", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![one, two])), + true, + ))), + false, + )); + + let l9 = Arc::new(Field::new( + "l9", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l10])), + true, + ))), + false, + )); + + let l8 = Arc::new(Field::new( + "l8", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l9])), + true, + ))), + false, + )); + let l7 = Arc::new(Field::new( + "l7", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l8])), + true, + ))), + false, + )); + let l6 = Arc::new(Field::new( + "l6", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l7])), + true, + ))), + false, + )); + let l5 = Arc::new(Field::new( + "l5", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l6])), + true, + ))), + false, + )); + let l4 = Arc::new(Field::new( + "l4", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l5])), + true, + ))), + false, + )); + let l3 = Arc::new(Field::new( + "l3", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l4])), + true, + ))), + false, + )); + let l2 = Arc::new(Field::new( + "l2", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l3])), + true, + ))), + false, + )); + let l1 = Arc::new(Field::new( + "l1", + DataType::List(Arc::new(Field::new_list_field( + DataType::Struct(Fields::from(vec![l2])), + true, + ))), + false, + )); + + let normalize_all = Schema::new(vec![l1]) + .normalize(".", None) + .expect("valid normalization"); + + assert_eq!(normalize_all.fields().len(), 1); + } + + #[test] + fn normalize_dictionary() { + let a = Arc::new(Field::new("a", DataType::Utf8, true)); + let b = Arc::new(Field::new("b", DataType::Int64, false)); + + let one = Arc::new(Field::new( + "1", + DataType::Dictionary( + Box::new(DataType::Int32), + Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))), + ), + false, + )); + + let normalize_all = Schema::new(vec![one.clone()]) + .normalize(".", None) + .expect("valid normalization"); + + let expected = Schema::new(vec![Field::new( + "1", + DataType::Dictionary( + Box::new(DataType::Int32), + Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))), + ), + false, + )]); + + assert_eq!(normalize_all, expected); + } + #[test] #[should_panic( expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"