From 97160a3ae50d7288a04ba26b11b91b13b1d48a28 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 27 Jun 2023 17:08:10 -0400 Subject: [PATCH] Minor: Simplify date_trunc code and add comments --- .../physical-expr/src/datetime_expressions.rs | 47 +++++++------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 3a36e8a4899a..e429e7bc45af 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -214,7 +214,11 @@ fn quarter_month(date: &NaiveDateTime) -> u32 { 1 + 3 * ((date.month() - 1) / 3) } -fn date_trunc_single(granularity: &str, value: i64) -> Result { +/// Tuncates the single `value`, expressed in nanoseconds since the +/// epoch, for granularities greater than 1 second, in taking into +/// account that some granularities are not uniform durations of time +/// (e.g. months are not always the same lengths, leap seconds, etc) +fn date_trunc_coarse(granularity: &str, value: i64) -> Result { if granularity == "millisecond" || granularity == "microsecond" { return Ok(value); } @@ -266,11 +270,11 @@ fn date_trunc_single(granularity: &str, value: i64) -> Result { Ok(value.unwrap().timestamp_nanos()) } +// truncates a single value with the given timeunit to the specified granularity fn _date_trunc( tu: TimeUnit, value: &Option, granularity: &str, - f: impl Fn(Option) -> Result>, ) -> Result, DataFusionError> { let scale = match tu { TimeUnit::Second => 1_000_000_000, @@ -284,9 +288,7 @@ fn _date_trunc( }; // convert to nanoseconds - let Some(nano) = (f)(Some(value * scale))? else { - return Ok(None); - }; + let nano = date_trunc_coarse(granularity, scale * value)?; let result = match tu { TimeUnit::Second => match granularity { @@ -328,29 +330,24 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result { )); }; - let f = |x: Option| { - x.map(|x| date_trunc_single(granularity.as_str(), x)) - .transpose() - }; - Ok(match array { ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => { - let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str(), f)?; + let value = _date_trunc(TimeUnit::Nanosecond, v, granularity.as_str())?; let value = ScalarValue::TimestampNanosecond(value, tz_opt.clone()); ColumnarValue::Scalar(value) } ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => { - let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str(), f)?; + let value = _date_trunc(TimeUnit::Microsecond, v, granularity.as_str())?; let value = ScalarValue::TimestampMicrosecond(value, tz_opt.clone()); ColumnarValue::Scalar(value) } ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => { - let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str(), f)?; + let value = _date_trunc(TimeUnit::Millisecond, v, granularity.as_str())?; let value = ScalarValue::TimestampMillisecond(value, tz_opt.clone()); ColumnarValue::Scalar(value) } ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => { - let value = _date_trunc(TimeUnit::Second, v, granularity.as_str(), f)?; + let value = _date_trunc(TimeUnit::Second, v, granularity.as_str())?; let value = ScalarValue::TimestampSecond(value, tz_opt.clone()); ColumnarValue::Scalar(value) } @@ -361,9 +358,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result { let array = as_timestamp_second_array(array)?; let array = array .iter() - .map(|x| { - _date_trunc(TimeUnit::Second, &x, granularity.as_str(), f) - }) + .map(|x| _date_trunc(TimeUnit::Second, &x, granularity.as_str())) .collect::>()?; ColumnarValue::Array(Arc::new(array)) } @@ -372,12 +367,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result { let array = array .iter() .map(|x| { - _date_trunc( - TimeUnit::Millisecond, - &x, - granularity.as_str(), - f, - ) + _date_trunc(TimeUnit::Millisecond, &x, granularity.as_str()) }) .collect::>()?; ColumnarValue::Array(Arc::new(array)) @@ -387,12 +377,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result { let array = array .iter() .map(|x| { - _date_trunc( - TimeUnit::Microsecond, - &x, - granularity.as_str(), - f, - ) + _date_trunc(TimeUnit::Microsecond, &x, granularity.as_str()) }) .collect::>()?; ColumnarValue::Array(Arc::new(array)) @@ -402,7 +387,7 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result { let array = array .iter() .map(|x| { - _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str(), f) + _date_trunc(TimeUnit::Nanosecond, &x, granularity.as_str()) }) .collect::>()?; @@ -996,7 +981,7 @@ mod tests { cases.iter().for_each(|(original, granularity, expected)| { let left = string_to_timestamp_nanos(original).unwrap(); let right = string_to_timestamp_nanos(expected).unwrap(); - let result = date_trunc_single(granularity, left).unwrap(); + let result = date_trunc_coarse(granularity, left).unwrap(); assert_eq!(result, right, "{original} = {expected}"); }); }