diff --git a/arrow/benches/csv_writer.rs b/arrow/benches/csv_writer.rs index 50b94d6836d9..62c5da980312 100644 --- a/arrow/benches/csv_writer.rs +++ b/arrow/benches/csv_writer.rs @@ -28,14 +28,14 @@ use arrow::record_batch::RecordBatch; use std::fs::File; use std::sync::Arc; -fn record_batches_to_csv() { +fn criterion_benchmark(c: &mut Criterion) { #[cfg(feature = "csv")] { let schema = Schema::new(vec![ Field::new("c1", DataType::Utf8, false), Field::new("c2", DataType::Float64, true), Field::new("c3", DataType::UInt32, false), - Field::new("c3", DataType::Boolean, true), + Field::new("c4", DataType::Boolean, true), ]); let c1 = StringArray::from(vec![ @@ -59,16 +59,17 @@ fn record_batches_to_csv() { let file = File::create("target/bench_write_csv.csv").unwrap(); let mut writer = csv::Writer::new(file); let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b]; - #[allow(clippy::unit_arg)] - criterion::black_box(for batch in batches { - writer.write(batch).unwrap() + + c.bench_function("record_batches_to_csv", |b| { + b.iter(|| { + #[allow(clippy::unit_arg)] + criterion::black_box(for batch in &batches { + writer.write(batch).unwrap() + }); + }); }); } } -fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("record_batches_to_csv", |b| b.iter(record_batches_to_csv)); -} - criterion_group!(benches, criterion_benchmark); criterion_main!(benches); diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index 0cb4db485239..0b374dba7397 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -666,6 +666,17 @@ impl DecimalArray { self.length * i as i32 } + #[inline] + pub fn value_as_string(&self, row: usize) -> String { + let decimal_string = self.value(row).to_string(); + if self.scale == 0 { + decimal_string + } else { + let splits = decimal_string.split_at(decimal_string.len() - self.scale); + format!("{}.{}", splits.0, splits.1) + } + } + pub fn from_fixed_size_list_array( v: FixedSizeListArray, precision: usize, @@ -729,7 +740,9 @@ impl fmt::Debug for DecimalArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?; print_long_array(self, f, |array, index, f| { - fmt::Debug::fmt(&array.value(index), f) + let formatted_decimal = array.value_as_string(index); + + write!(f, "{}", formatted_decimal) })?; write!(f, "]") } @@ -758,7 +771,7 @@ impl Array for DecimalArray { #[cfg(test)] mod tests { use crate::{ - array::{LargeListArray, ListArray}, + array::{DecimalBuilder, LargeListArray, ListArray}, datatypes::Field, }; @@ -1163,17 +1176,16 @@ mod tests { #[test] fn test_decimal_array_fmt_debug() { - let values: [u8; 32] = [ - 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]; - let array_data = ArrayData::builder(DataType::Decimal(23, 6)) - .len(2) - .add_buffer(Buffer::from(&values[..])) - .build(); - let arr = DecimalArray::from(array_data); + let values: Vec = vec![8887000000, -8887000000]; + let mut decimal_builder = DecimalBuilder::new(3, 23, 6); + + values.iter().for_each(|&value| { + decimal_builder.append_value(value).unwrap(); + }); + decimal_builder.append_null().unwrap(); + let arr = decimal_builder.finish(); assert_eq!( - "DecimalArray<23, 6>\n[\n 8887000000,\n -8887000000,\n]", + "DecimalArray<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]", format!("{:?}", arr) ); } diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs index aa0ed675221f..b3b883810232 100644 --- a/arrow/src/csv/writer.rs +++ b/arrow/src/csv/writer.rs @@ -70,6 +70,7 @@ use std::io::Write; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; +use crate::util::display::make_string_from_decimal; use crate::{array::*, util::serialization::lexical_to_string}; const DEFAULT_DATE_FORMAT: &str = "%F"; const DEFAULT_TIME_FORMAT: &str = "%T"; @@ -242,6 +243,7 @@ impl Writer { }; format!("{}", datetime.format(&self.timestamp_format)) } + DataType::Decimal(..) => make_string_from_decimal(col, row_index)?, t => { // List and Struct arrays not supported by the writer, any // other type needs to be implemented @@ -566,6 +568,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo Field::new("c4", DataType::Boolean, true), Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true), Field::new("c6", DataType::Time32(TimeUnit::Second), false), + Field::new("c7", DataType::Decimal(6, 2), false), ]); let c1 = StringArray::from(vec![ @@ -585,6 +588,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo None, ); let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]); + let mut c7_builder = DecimalBuilder::new(5, 6, 2); + c7_builder.append_value(12345_i128).unwrap(); + c7_builder.append_value(-12345_i128).unwrap(); + c7_builder.append_null().unwrap(); + let c7 = c7_builder.finish(); let batch = RecordBatch::try_new( Arc::new(schema), @@ -595,6 +603,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo Arc::new(c4), Arc::new(c5), Arc::new(c6), + Arc::new(c7), ], ) .unwrap(); @@ -606,13 +615,13 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo writer.write(batch).unwrap(); } - let left = "c1,c2,c3,c4,c5,c6 -Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34 -consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20 -sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03 -Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34 -consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20 -sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n"; + let left = "c1,c2,c3,c4,c5,c6,c7 +Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45 +consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45 +sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03, +Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45 +consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45 +sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n"; let right = writer.writer.into_inner().map(|s| s.to_string()); assert_eq!(Some(left.to_string()), right.ok()); } diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 13d9f1959f6e..bb75a3a66081 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -19,6 +19,8 @@ //! purposes. See the `pretty` crate for additional functions for //! record batch pretty printing. +use std::sync::Arc; + use crate::array::Array; use crate::datatypes::{ ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, @@ -192,18 +194,15 @@ macro_rules! make_string_from_list { }}; } -macro_rules! make_string_from_decimal { - ($array_type: ty, $column: ident, $row: ident, $scale: ident) => {{ - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - let decimal_string = array.value($row).to_string(); - let formatted_decimal = if *$scale == 0 { - decimal_string - } else { - let splits = decimal_string.split_at(decimal_string.len() - *$scale); - format!("{}.{}", splits.0, splits.1) - }; - Ok(formatted_decimal) - }}; +#[inline(always)] +pub fn make_string_from_decimal(column: &Arc, row: usize) -> Result { + let array = column + .as_any() + .downcast_ref::() + .unwrap(); + + let formatted_decimal = array.value_as_string(row); + Ok(formatted_decimal) } /// Get the value at the given row in an array as a String. @@ -231,9 +230,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result make_string!(array::Float32Array, column, row), DataType::Float32 => make_string!(array::Float32Array, column, row), DataType::Float64 => make_string!(array::Float64Array, column, row), - DataType::Decimal(_, scale) => { - make_string_from_decimal!(array::DecimalArray, column, row, scale) - } + DataType::Decimal(..) => make_string_from_decimal(column, row), DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { make_string_datetime!(array::TimestampSecondArray, column, row) }