From 7449e2c6643445c3b24f956f50f13b512112299d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 14 Jan 2022 11:17:36 -0500 Subject: [PATCH 1/4] Fix new clippy lints introduced in Rust 1.58 (#1170) --- arrow/src/buffer/immutable.rs | 1 + arrow/src/util/test_util.rs | 2 +- parquet/src/schema/printer.rs | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs index bd09702bf3f9..b5174d3288ae 100644 --- a/arrow/src/buffer/immutable.rs +++ b/arrow/src/buffer/immutable.rs @@ -244,6 +244,7 @@ impl std::ops::Deref for Buffer { } unsafe impl Sync for Buffer {} +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for Buffer {} impl From for Buffer { diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs index 6b1ad4be1d76..cae148a53d5c 100644 --- a/arrow/src/util/test_util.rs +++ b/arrow/src/util/test_util.rs @@ -147,7 +147,7 @@ fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result { // Also print converted type if it is available match converted_type { - ConvertedType::NONE => format!(""), + ConvertedType::NONE => String::new(), decimal @ ConvertedType::DECIMAL => { // For decimal type we should print precision and scale if they // are > 0, e.g. DECIMAL(9,2) - @@ -256,7 +256,7 @@ fn print_logical_and_converted( format!("({},{})", p, s) } (p, 0) if p > 0 => format!("({})", p), - _ => format!(""), + _ => String::new(), }; format!("{}{}", decimal, precision_scale) } From 5fb0534b5ac8bf1c125a5e3f9b5f1cda75baa0a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Fri, 14 Jan 2022 17:17:44 +0100 Subject: [PATCH 2/4] Fix compilation error with simd feature (#1169) --- arrow/src/compute/kernels/arithmetic.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 340c1139a464..4eb957678892 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -628,7 +628,11 @@ where #[cfg(feature = "simd")] { let scalar_vector = T::init(scalar); - return simd_unary_math_op(array, |x| x - scalar_vector, |x| x - scalar); + return Ok(simd_unary_math_op( + array, + |x| x - scalar_vector, + |x| x - scalar, + )); } #[cfg(not(feature = "simd"))] return Ok(unary(array, |value| value - scalar)); @@ -706,7 +710,11 @@ where #[cfg(feature = "simd")] { let scalar_vector = T::init(scalar); - return simd_unary_math_op(array, |x| x * scalar_vector, |x| x * scalar); + return Ok(simd_unary_math_op( + array, + |x| x * scalar_vector, + |x| x * scalar, + )); } #[cfg(not(feature = "simd"))] return Ok(unary(array, |value| value * scalar)); From f2d7a2149e2d9e097be73fd70be5dc83771a5630 Mon Sep 17 00:00:00 2001 From: Helgi Kristvin Sigurbjarnarson Date: Fri, 14 Jan 2022 10:09:51 -0800 Subject: [PATCH 3/4] Bugfix in parquet writing empty lists of structs (#1166) Fix a bug in the definition level calculation for fields nested within a struct and a list. When a list is empty or null in parquet the nested field gets a null value. However, in arrow, the value is simply missing. When serializing an immediate child of the list, the list offsets are used to calculate the correct definition level for its children, but it is not carried further to fields nested deeper (e.g., fields on a struct within a list). This (somewhat hacky) fix treats a struct within a list as if it were a list. --- parquet/src/arrow/levels.rs | 100 +++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/levels.rs index 601e2c0966da..07460884df82 100644 --- a/parquet/src/arrow/levels.rs +++ b/parquet/src/arrow/levels.rs @@ -287,11 +287,18 @@ impl LevelInfo { .as_any() .downcast_ref::() .expect("Unable to get struct array"); - let struct_level = self.calculate_child_levels( + let mut struct_level = self.calculate_child_levels( array_offsets, array_mask, LevelType::Struct(field.is_nullable()), ); + + // If the parent field is a list, calculate the children of the struct as if it + // were a list as well. + if matches!(self.level_type, LevelType::List(_)) { + struct_level.level_type = LevelType::List(false); + } + let mut struct_levels = vec![]; struct_array .columns() @@ -1675,4 +1682,95 @@ mod tests { }; assert_eq!(list_level, &expected_level); } + + #[test] + fn test_list_of_struct() { + // define schema + let int_field = Field::new("a", DataType::Int32, true); + let item_field = + Field::new("item", DataType::Struct(vec![int_field.clone()]), true); + let list_field = Field::new("list", DataType::List(Box::new(item_field)), true); + + let int_builder = Int32Builder::new(10); + let struct_builder = + StructBuilder::new(vec![int_field], vec![Box::new(int_builder)]); + let mut list_builder = ListBuilder::new(struct_builder); + + // [{a: 1}], [], null, [null, null], [{a: null}], [{a: 2}] + // + // [{a: 1}] + let values = list_builder.values(); + values + .field_builder::(0) + .unwrap() + .append_value(1) + .unwrap(); + values.append(true).unwrap(); + list_builder.append(true).unwrap(); + + // [] + list_builder.append(true).unwrap(); + + // null + list_builder.append(false).unwrap(); + + // [null, null] + let values = list_builder.values(); + values + .field_builder::(0) + .unwrap() + .append_null() + .unwrap(); + values.append(false).unwrap(); + values + .field_builder::(0) + .unwrap() + .append_null() + .unwrap(); + values.append(false).unwrap(); + list_builder.append(true).unwrap(); + + // [{a: null}] + let values = list_builder.values(); + values + .field_builder::(0) + .unwrap() + .append_null() + .unwrap(); + values.append(true).unwrap(); + list_builder.append(true).unwrap(); + + // [{a: 2}] + let values = list_builder.values(); + values + .field_builder::(0) + .unwrap() + .append_value(2) + .unwrap(); + values.append(true).unwrap(); + list_builder.append(true).unwrap(); + + let array = Arc::new(list_builder.finish()); + + let schema = Arc::new(Schema::new(vec![list_field])); + + let rb = RecordBatch::try_new(schema, vec![array]).unwrap(); + + let batch_level = LevelInfo::new(0, rb.num_rows()); + let list_level = + &batch_level.calculate_array_levels(rb.column(0), rb.schema().field(0))[0]; + + let expected_level = LevelInfo { + definition: vec![4, 1, 0, 2, 2, 3, 4], + repetition: Some(vec![0, 0, 0, 0, 1, 0, 0]), + array_offsets: vec![0, 1, 1, 1, 3, 4, 5], + array_mask: vec![true, true, false, false, false, false, true], + max_definition: 4, + level_type: LevelType::Primitive(true), + offset: 0, + length: 5, + }; + + assert_eq!(list_level, &expected_level); + } } From 66b84f37e18e4ba5f9b3463f84f4bbbefb6c4341 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Sat, 15 Jan 2022 11:34:44 +0000 Subject: [PATCH 4/4] Fix record formatting in 1.58 (#1178) --- parquet/src/record/api.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs index 0293ffee7da0..f5b30753dfee 100644 --- a/parquet/src/record/api.rs +++ b/parquet/src/record/api.rs @@ -716,15 +716,19 @@ impl fmt::Display for Field { Field::Float(value) => { if !(1e-15..=1e19).contains(&value) { write!(f, "{:E}", value) + } else if value.trunc() == value { + write!(f, "{}.0", value) } else { - write!(f, "{:?}", value) + write!(f, "{}", value) } } Field::Double(value) => { if !(1e-15..=1e19).contains(&value) { write!(f, "{:E}", value) + } else if value.trunc() == value { + write!(f, "{}.0", value) } else { - write!(f, "{:?}", value) + write!(f, "{}", value) } } Field::Decimal(ref value) => {