Skip to content

Commit

Permalink
Add Decimal to CsvWriter and improve debug display (#406) (#465)
Browse files Browse the repository at this point in the history
* Add Decimal to CsvWriter and improve debug display

* Measure CSV writer instead of file and data creation

* Re-use decimal formatting

Co-authored-by: Ádám Lippai <[email protected]>
  • Loading branch information
alamb and alippai authored Jun 21, 2021
1 parent 153085f commit 93b5171
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 34 deletions.
36 changes: 24 additions & 12 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,17 @@ impl DecimalArray {
self.length * i as i32
}

#[inline]
pub fn value_as_string(&self, row: usize) -> String {
let decimal_string = self.value(row).to_string();
if self.scale == 0 {
decimal_string
} else {
let splits = decimal_string.split_at(decimal_string.len() - self.scale);
format!("{}.{}", splits.0, splits.1)
}
}

pub fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
Expand Down Expand Up @@ -729,7 +740,9 @@ impl fmt::Debug for DecimalArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
let formatted_decimal = array.value_as_string(index);

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
Expand Down Expand Up @@ -758,7 +771,7 @@ impl Array for DecimalArray {
#[cfg(test)]
mod tests {
use crate::{
array::{LargeListArray, ListArray},
array::{DecimalBuilder, LargeListArray, ListArray},
datatypes::Field,
};

Expand Down Expand Up @@ -1163,17 +1176,16 @@ mod tests {

#[test]
fn test_decimal_array_fmt_debug() {
let values: [u8; 32] = [
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];
let array_data = ArrayData::builder(DataType::Decimal(23, 6))
.len(2)
.add_buffer(Buffer::from(&values[..]))
.build();
let arr = DecimalArray::from(array_data);
let values: Vec<i128> = vec![8887000000, -8887000000];
let mut decimal_builder = DecimalBuilder::new(3, 23, 6);

values.iter().for_each(|&value| {
decimal_builder.append_value(value).unwrap();
});
decimal_builder.append_null().unwrap();
let arr = decimal_builder.finish();
assert_eq!(
"DecimalArray<23, 6>\n[\n 8887000000,\n -8887000000,\n]",
"DecimalArray<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]",
format!("{:?}", arr)
);
}
Expand Down
23 changes: 16 additions & 7 deletions arrow/src/csv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use std::io::Write;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;
use crate::util::display::make_string_from_decimal;
use crate::{array::*, util::serialization::lexical_to_string};
const DEFAULT_DATE_FORMAT: &str = "%F";
const DEFAULT_TIME_FORMAT: &str = "%T";
Expand Down Expand Up @@ -244,6 +245,7 @@ impl<W: Write> Writer<W> {
};
format!("{}", datetime.format(&self.timestamp_format))
}
DataType::Decimal(..) => make_string_from_decimal(col, row_index)?,
t => {
// List and Struct arrays not supported by the writer, any
// other type needs to be implemented
Expand Down Expand Up @@ -568,6 +570,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
Field::new("c4", DataType::Boolean, true),
Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("c6", DataType::Time32(TimeUnit::Second), false),
Field::new("c7", DataType::Decimal(6, 2), false),
]);

let c1 = StringArray::from(vec![
Expand All @@ -587,6 +590,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
None,
);
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
let mut c7_builder = DecimalBuilder::new(5, 6, 2);
c7_builder.append_value(12345_i128).unwrap();
c7_builder.append_value(-12345_i128).unwrap();
c7_builder.append_null().unwrap();
let c7 = c7_builder.finish();

let batch = RecordBatch::try_new(
Arc::new(schema),
Expand All @@ -597,6 +605,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
Arc::new(c4),
Arc::new(c5),
Arc::new(c6),
Arc::new(c7),
],
)
.unwrap();
Expand All @@ -608,13 +617,13 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
writer.write(batch).unwrap();
}

let left = "c1,c2,c3,c4,c5,c6
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n";
let left = "c1,c2,c3,c4,c5,c6,c7
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,
Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
let right = writer.writer.into_inner().map(|s| s.to_string());
assert_eq!(Some(left.to_string()), right.ok());
}
Expand Down
27 changes: 12 additions & 15 deletions arrow/src/util/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
//! purposes. See the `pretty` crate for additional functions for
//! record batch pretty printing.
use std::sync::Arc;

use crate::array::Array;
use crate::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
Expand Down Expand Up @@ -192,18 +194,15 @@ macro_rules! make_string_from_list {
}};
}

macro_rules! make_string_from_decimal {
($array_type: ty, $column: ident, $row: ident, $scale: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
let decimal_string = array.value($row).to_string();
let formatted_decimal = if *$scale == 0 {
decimal_string
} else {
let splits = decimal_string.split_at(decimal_string.len() - *$scale);
format!("{}.{}", splits.0, splits.1)
};
Ok(formatted_decimal)
}};
#[inline(always)]
pub fn make_string_from_decimal(column: &Arc<dyn Array>, row: usize) -> Result<String> {
let array = column
.as_any()
.downcast_ref::<array::DecimalArray>()
.unwrap();

let formatted_decimal = array.value_as_string(row);
Ok(formatted_decimal)
}

/// Get the value at the given row in an array as a String.
Expand Down Expand Up @@ -231,9 +230,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
DataType::Float16 => make_string!(array::Float32Array, column, row),
DataType::Float32 => make_string!(array::Float32Array, column, row),
DataType::Float64 => make_string!(array::Float64Array, column, row),
DataType::Decimal(_, scale) => {
make_string_from_decimal!(array::DecimalArray, column, row, scale)
}
DataType::Decimal(..) => make_string_from_decimal(column, row),
DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
make_string_datetime!(array::TimestampSecondArray, column, row)
}
Expand Down

0 comments on commit 93b5171

Please sign in to comment.