Skip to content

Commit

Permalink
Fixed issue with Time32/Time64 datatype in csv reader (jorgecarleitao…
Browse files Browse the repository at this point in the history
  • Loading branch information
christophe-petitjean authored and ritchie46 committed Apr 5, 2023
1 parent 5ba1a3a commit ea68a88
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 4 deletions.
35 changes: 34 additions & 1 deletion src/io/csv/read_utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use chrono::Datelike;
use chrono::{Datelike, Timelike};

use crate::{
array::*,
Expand Down Expand Up @@ -202,6 +202,29 @@ pub(crate) fn deserialize_column<B: ByteRecordGeneric>(
.and_then(|x| x.parse::<chrono::NaiveDateTime>().ok())
.map(|x| x.timestamp_millis())
}),
Time32(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| {
let factor = get_factor_from_timeunit(time_unit);
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveTime>().ok())
.map(|x| {
(x.hour() * 3_600 * factor
+ x.minute() * 60 * factor
+ x.second() * factor
+ x.nanosecond() / (1_000_000_000 / factor)) as i32
})
}),
Time64(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| {
let factor: u64 = get_factor_from_timeunit(time_unit).into();
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveTime>().ok())
.map(|x| {
(x.hour() as u64 * 3_600 * factor
+ x.minute() as u64 * 60 * factor
+ x.second() as u64 * factor
+ x.nanosecond() as u64 / (1_000_000_000 / factor))
as i64
})
}),
Timestamp(time_unit, None) => deserialize_primitive(rows, column, datatype, |bytes| {
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveDateTime>().ok())
Expand Down Expand Up @@ -274,3 +297,13 @@ where
.collect::<Result<Vec<_>>>()
.and_then(Chunk::try_new)
}

// Return the factor by how small is a time unit compared to seconds
fn get_factor_from_timeunit(time_unit: TimeUnit) -> u32 {
match time_unit {
TimeUnit::Second => 1,
TimeUnit::Millisecond => 1_000,
TimeUnit::Microsecond => 1_000_000,
TimeUnit::Nanosecond => 1_000_000_000,
}
}
6 changes: 3 additions & 3 deletions src/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn date64_to_date(milliseconds: i64) -> NaiveDate {
date64_to_datetime(milliseconds).date()
}

/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`]
/// converts a `i32` representing a `time32(s)` to [`NaiveTime`]
#[inline]
pub fn time32s_to_time(v: i32) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(v as u32, 0).expect("invalid time")
Expand All @@ -78,7 +78,7 @@ pub fn time32ms_to_time(v: i32) -> NaiveTime {
.expect("invalid time")
}

/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`]
/// converts a `i64` representing a `time64(us)` to [`NaiveTime`]
#[inline]
pub fn time64us_to_time(v: i64) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(
Expand All @@ -91,7 +91,7 @@ pub fn time64us_to_time(v: i64) -> NaiveTime {
.expect("invalid time")
}

/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`]
/// converts a `i64` representing a `time64(ns)` to [`NaiveTime`]
#[inline]
pub fn time64ns_to_time(v: i64) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(
Expand Down
48 changes: 48 additions & 0 deletions tests/it/io/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,54 @@ fn date64() -> Result<()> {
Ok(())
}

#[test]
fn time32_s() -> Result<()> {
let result = test_deserialize(
"00:00:00,\n23:59:59,\n11:00:11,\n",
DataType::Time32(TimeUnit::Second),
)?;
let expected = Int32Array::from(&[Some(0), Some(86399), Some(39611)])
.to(DataType::Time32(TimeUnit::Second));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time32_ms() -> Result<()> {
let result = test_deserialize(
"00:00:00.000,\n23:59:59.999,\n00:00:00.999,\n",
DataType::Time32(TimeUnit::Millisecond),
)?;
let expected = Int32Array::from(&[Some(0), Some(86_399_999), Some(999)])
.to(DataType::Time32(TimeUnit::Millisecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time64_us() -> Result<()> {
let result = test_deserialize(
"00:00:00.000000,\n23:59:59.999999,\n00:00:00.000001,\n",
DataType::Time64(TimeUnit::Microsecond),
)?;
let expected = Int64Array::from(&[Some(0), Some(86_399_999_999), Some(1)])
.to(DataType::Time64(TimeUnit::Microsecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time64_ns() -> Result<()> {
let result = test_deserialize(
"00:00:00.000000000,\n23:59:59.999999999,\n00:00:00.000000001,\n",
DataType::Time64(TimeUnit::Nanosecond),
)?;
let expected = Int64Array::from(&[Some(0), Some(86_399_999_999_999), Some(1)])
.to(DataType::Time64(TimeUnit::Nanosecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn decimal() -> Result<()> {
let result = test_deserialize("1.1,\n1.2,\n1.22,\n1.3,\n", DataType::Decimal(2, 1))?;
Expand Down

0 comments on commit ea68a88

Please sign in to comment.