diff --git a/parquet/src/arrow/converter.rs b/parquet/src/arrow/converter.rs index 169ee7b67028..51e1d8290ee3 100644 --- a/parquet/src/arrow/converter.rs +++ b/parquet/src/arrow/converter.rs @@ -15,30 +15,20 @@ // specific language governing permissions and limitations // under the License. -use crate::data_type::{ByteArray, DataType, FixedLenByteArray, Int96}; -// TODO: clean up imports (best done when there are few moving parts) +use crate::data_type::{ByteArray, FixedLenByteArray, Int96}; use arrow::array::{ - Array, ArrayRef, BinaryBuilder, FixedSizeBinaryBuilder, - IntervalDayTimeArray, IntervalDayTimeBuilder, IntervalYearMonthArray, - IntervalYearMonthBuilder, LargeBinaryBuilder, LargeStringBuilder, PrimitiveBuilder, - PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder, + Array, ArrayRef, BinaryArray, BinaryBuilder, DecimalArray, FixedSizeBinaryArray, + FixedSizeBinaryBuilder, IntervalDayTimeArray, IntervalDayTimeBuilder, + IntervalYearMonthArray, IntervalYearMonthBuilder, LargeBinaryArray, + LargeBinaryBuilder, LargeStringArray, LargeStringBuilder, StringArray, StringBuilder, + TimestampNanosecondArray, }; -use arrow::compute::cast; use std::convert::{From, TryInto}; use std::sync::Arc; use crate::errors::Result; -use arrow::datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType}; - -use arrow::array::{ - BinaryArray, DecimalArray, DictionaryArray, FixedSizeBinaryArray, LargeBinaryArray, - LargeStringArray, PrimitiveArray, StringArray, TimestampNanosecondArray, -}; use std::marker::PhantomData; -use crate::data_type::Int32Type as ParquetInt32Type; -use arrow::datatypes::Int32Type; - /// A converter is used to consume record reader's content and convert it to arrow /// primitive array. pub trait Converter { @@ -100,13 +90,11 @@ impl DecimalArrayConverter { impl Converter>, DecimalArray> for DecimalArrayConverter { fn convert(&self, source: Vec>) -> Result { - let array = source.into_iter() + let array = source + .into_iter() .map(|array| array.map(|array| Self::from_bytes_to_i128(array.data()))) .collect::() - .with_precision_and_scale( - self.precision as usize, - self.scale as usize - )?; + .with_precision_and_scale(self.precision as usize, self.scale as usize)?; Ok(array) } @@ -251,92 +239,6 @@ impl Converter>, LargeBinaryArray> for LargeBinaryArrayCon } } -pub struct StringDictionaryArrayConverter {} - -impl Converter>, DictionaryArray> - for StringDictionaryArrayConverter -{ - fn convert(&self, source: Vec>) -> Result> { - let data_size = source - .iter() - .map(|x| x.as_ref().map(|b| b.len()).unwrap_or(0)) - .sum(); - - let keys_builder = PrimitiveBuilder::::new(source.len()); - let values_builder = StringBuilder::with_capacity(source.len(), data_size); - - let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); - for v in source { - match v { - Some(array) => { - let _ = builder.append(array.as_utf8()?)?; - } - None => builder.append_null()?, - } - } - - Ok(builder.finish()) - } -} - -pub struct DictionaryArrayConverter -{ - _dict_value_source_marker: PhantomData, - _dict_value_target_marker: PhantomData, - _parquet_marker: PhantomData, -} - -impl - DictionaryArrayConverter -{ - pub fn new() -> Self { - Self { - _dict_value_source_marker: PhantomData, - _dict_value_target_marker: PhantomData, - _parquet_marker: PhantomData, - } - } -} - -impl - Converter::T>>, DictionaryArray> - for DictionaryArrayConverter -where - K: ArrowPrimitiveType, - DictValueSourceType: ArrowPrimitiveType, - DictValueTargetType: ArrowPrimitiveType, - ParquetType: DataType, - PrimitiveArray: From::T>>>, -{ - fn convert( - &self, - source: Vec::T>>, - ) -> Result> { - let keys_builder = PrimitiveBuilder::::new(source.len()); - let values_builder = PrimitiveBuilder::::new(source.len()); - - let mut builder = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); - - let source_array: Arc = - Arc::new(PrimitiveArray::::from(source)); - let target_array = cast(&source_array, &DictValueTargetType::DATA_TYPE)?; - let target = target_array - .as_any() - .downcast_ref::>() - .unwrap(); - - for i in 0..target.len() { - if target.is_null(i) { - builder.append_null()?; - } else { - let _ = builder.append(target.value(i))?; - } - } - - Ok(builder.finish()) - } -} - pub type Utf8Converter = ArrayRefConverter>, StringArray, Utf8ArrayConverter>; pub type LargeUtf8Converter = @@ -348,21 +250,6 @@ pub type LargeBinaryConverter = ArrayRefConverter< LargeBinaryArray, LargeBinaryArrayConverter, >; -pub type StringDictionaryConverter = ArrayRefConverter< - Vec>, - DictionaryArray, - StringDictionaryArrayConverter, ->; -pub type DictionaryConverter = ArrayRefConverter< - Vec::T>>, - DictionaryArray, - DictionaryArrayConverter, ->; -pub type PrimitiveDictionaryConverter = ArrayRefConverter< - Vec::T>>, - DictionaryArray, - DictionaryArrayConverter, ->; pub type Int96Converter = ArrayRefConverter>, TimestampNanosecondArray, Int96ArrayConverter>;