From ea68a88cd2a43fa492fff664a271925ff8a1c758 Mon Sep 17 00:00:00 2001 From: Christophe Petitjean Date: Fri, 10 Mar 2023 14:42:02 +0100 Subject: [PATCH] Fixed issue with Time32/Time64 datatype in csv reader (#1425) --- src/io/csv/read_utils.rs | 35 ++++++++++++++++++++++++++- src/temporal_conversions.rs | 6 ++--- tests/it/io/csv/read.rs | 48 +++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/src/io/csv/read_utils.rs b/src/io/csv/read_utils.rs index 2212ac45acc..d23f6c1197f 100644 --- a/src/io/csv/read_utils.rs +++ b/src/io/csv/read_utils.rs @@ -1,4 +1,4 @@ -use chrono::Datelike; +use chrono::{Datelike, Timelike}; use crate::{ array::*, @@ -202,6 +202,29 @@ pub(crate) fn deserialize_column( .and_then(|x| x.parse::().ok()) .map(|x| x.timestamp_millis()) }), + Time32(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| { + let factor = get_factor_from_timeunit(time_unit); + to_utf8(bytes) + .and_then(|x| x.parse::().ok()) + .map(|x| { + (x.hour() * 3_600 * factor + + x.minute() * 60 * factor + + x.second() * factor + + x.nanosecond() / (1_000_000_000 / factor)) as i32 + }) + }), + Time64(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| { + let factor: u64 = get_factor_from_timeunit(time_unit).into(); + to_utf8(bytes) + .and_then(|x| x.parse::().ok()) + .map(|x| { + (x.hour() as u64 * 3_600 * factor + + x.minute() as u64 * 60 * factor + + x.second() as u64 * factor + + x.nanosecond() as u64 / (1_000_000_000 / factor)) + as i64 + }) + }), Timestamp(time_unit, None) => deserialize_primitive(rows, column, datatype, |bytes| { to_utf8(bytes) .and_then(|x| x.parse::().ok()) @@ -274,3 +297,13 @@ where .collect::>>() .and_then(Chunk::try_new) } + +// Return the factor by how small is a time unit compared to seconds +fn get_factor_from_timeunit(time_unit: TimeUnit) -> u32 { + match time_unit { + TimeUnit::Second => 1, + TimeUnit::Millisecond => 1_000, + TimeUnit::Microsecond => 1_000_000, + TimeUnit::Nanosecond => 1_000_000_000, + } +} diff --git a/src/temporal_conversions.rs b/src/temporal_conversions.rs index 67f896c1154..e26d3d8d1b3 100644 --- a/src/temporal_conversions.rs +++ b/src/temporal_conversions.rs @@ -60,7 +60,7 @@ pub fn date64_to_date(milliseconds: i64) -> NaiveDate { date64_to_datetime(milliseconds).date() } -/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`] +/// converts a `i32` representing a `time32(s)` to [`NaiveTime`] #[inline] pub fn time32s_to_time(v: i32) -> NaiveTime { NaiveTime::from_num_seconds_from_midnight_opt(v as u32, 0).expect("invalid time") @@ -78,7 +78,7 @@ pub fn time32ms_to_time(v: i32) -> NaiveTime { .expect("invalid time") } -/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`] +/// converts a `i64` representing a `time64(us)` to [`NaiveTime`] #[inline] pub fn time64us_to_time(v: i64) -> NaiveTime { NaiveTime::from_num_seconds_from_midnight_opt( @@ -91,7 +91,7 @@ pub fn time64us_to_time(v: i64) -> NaiveTime { .expect("invalid time") } -/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`] +/// converts a `i64` representing a `time64(ns)` to [`NaiveTime`] #[inline] pub fn time64ns_to_time(v: i64) -> NaiveTime { NaiveTime::from_num_seconds_from_midnight_opt( diff --git a/tests/it/io/csv/read.rs b/tests/it/io/csv/read.rs index c1756a71cf8..ca9d56b1912 100644 --- a/tests/it/io/csv/read.rs +++ b/tests/it/io/csv/read.rs @@ -302,6 +302,54 @@ fn date64() -> Result<()> { Ok(()) } +#[test] +fn time32_s() -> Result<()> { + let result = test_deserialize( + "00:00:00,\n23:59:59,\n11:00:11,\n", + DataType::Time32(TimeUnit::Second), + )?; + let expected = Int32Array::from(&[Some(0), Some(86399), Some(39611)]) + .to(DataType::Time32(TimeUnit::Second)); + assert_eq!(expected, result.as_ref()); + Ok(()) +} + +#[test] +fn time32_ms() -> Result<()> { + let result = test_deserialize( + "00:00:00.000,\n23:59:59.999,\n00:00:00.999,\n", + DataType::Time32(TimeUnit::Millisecond), + )?; + let expected = Int32Array::from(&[Some(0), Some(86_399_999), Some(999)]) + .to(DataType::Time32(TimeUnit::Millisecond)); + assert_eq!(expected, result.as_ref()); + Ok(()) +} + +#[test] +fn time64_us() -> Result<()> { + let result = test_deserialize( + "00:00:00.000000,\n23:59:59.999999,\n00:00:00.000001,\n", + DataType::Time64(TimeUnit::Microsecond), + )?; + let expected = Int64Array::from(&[Some(0), Some(86_399_999_999), Some(1)]) + .to(DataType::Time64(TimeUnit::Microsecond)); + assert_eq!(expected, result.as_ref()); + Ok(()) +} + +#[test] +fn time64_ns() -> Result<()> { + let result = test_deserialize( + "00:00:00.000000000,\n23:59:59.999999999,\n00:00:00.000000001,\n", + DataType::Time64(TimeUnit::Nanosecond), + )?; + let expected = Int64Array::from(&[Some(0), Some(86_399_999_999_999), Some(1)]) + .to(DataType::Time64(TimeUnit::Nanosecond)); + assert_eq!(expected, result.as_ref()); + Ok(()) +} + #[test] fn decimal() -> Result<()> { let result = test_deserialize("1.1,\n1.2,\n1.22,\n1.3,\n", DataType::Decimal(2, 1))?;