From 4b1ce74c9be1e556ecb582f3dc1d983d8f3daa25 Mon Sep 17 00:00:00 2001 From: Evance Soumaoro Date: Fri, 24 Jun 2022 11:31:46 +0000 Subject: [PATCH] refactored and fixed exported types --- Cargo.toml | 1 - examples/date_time_field.rs | 2 +- src/lib.rs | 2 +- src/schema/date_time.rs | 50 +++----- src/schema/date_time_options.rs | 207 +++++++------------------------- src/schema/mod.rs | 2 +- 6 files changed, 63 insertions(+), 201 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 09218a8be1..f4a3080796 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,6 @@ async-trait = "0.1.53" derivative = "2.2.0" time = { version = "0.3.10", features = ["std", "macros", "serde-well-known"] } chrono = "0.4.19" -chrono-tz = "0.6" [target.'cfg(windows)'.dependencies] winapi = "0.3.9" diff --git a/examples/date_time_field.rs b/examples/date_time_field.rs index 43ec5e4ab2..f33e04d765 100644 --- a/examples/date_time_field.rs +++ b/examples/date_time_field.rs @@ -54,7 +54,7 @@ fn main() -> tantivy::Result<()> { for (_score, doc_address) in count_docs { let retrieved_doc = searcher.doc(doc_address)?; assert!(matches!(retrieved_doc.get_first(occurred_at), - Some(Value::DateTime(dt)) if dt.precision == DateTimePrecision::Seconds)); + Some(Value::DateTime(dt)) if dt.get_precision() == DateTimePrecision::Seconds)); assert_eq!( schema.to_json(&retrieved_doc), r#"{"event_type":["double-click"],"occurred_at":["2022-06-22T13:00:00Z"]}"# diff --git a/src/lib.rs b/src/lib.rs index 35fefe1494..ddae867529 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,7 +135,7 @@ pub use time; pub use crate::error::TantivyError; pub use crate::future_result::FutureResult; -pub use crate::schema::{DateTime, DateTimePrecision}; +pub use crate::schema::{DateTime, DateTimeFormat, DateTimeOptions, DateTimePrecision}; /// Tantivy result. /// diff --git a/src/schema/date_time.rs b/src/schema/date_time.rs index 8885a7faeb..aa9613822c 100644 --- a/src/schema/date_time.rs +++ b/src/schema/date_time.rs @@ -4,8 +4,7 @@ use std::{fmt, io}; use byteorder::{ReadBytesExt, WriteBytesExt}; use chrono::NaiveDate; use common::BinarySerializable; -use serde::de::Error; -use serde::{Deserialize, Deserializer, Serialize}; +use serde::{Deserialize, Serialize}; use super::date_time_options::DateTimeFormat; use crate::time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339}; @@ -102,12 +101,22 @@ impl DateTime { } /// Converts to the underlying precise timestamp. - /// Seconds, Milliseconds, Microseconds, Nanoseconds. + /// TODO: deprecate or remove pub const fn into_timestamp(self) -> i64 { let Self { timestamp, .. } = self; timestamp } + /// Returns to the underlying timestamp. + pub fn get_timestamp(&self) -> i64 { + self.timestamp + } + + /// Returns the underlying timestamp precision. + pub fn get_precision(&self) -> DateTimePrecision { + self.precision + } + /// Convert to UTC `OffsetDateTime` pub fn into_utc(self) -> OffsetDateTime { let Self { @@ -157,7 +166,7 @@ impl DateTime { .ok_or_else(|| "Couldn't create NaiveDate from OffsetDateTime".to_string()) .map(|datetime| datetime.format(&str_fmt).to_string()) } - DateTimeFormat::UnixTimestamp(_) => Ok(self.timestamp.to_string()), + DateTimeFormat::Timestamp(_) => Ok(self.timestamp.to_string()), } } } @@ -172,7 +181,7 @@ impl fmt::Debug for DateTime { } /// DateTime Precision -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] #[repr(u8)] pub enum DateTimePrecision { /// Seconds precision @@ -215,35 +224,6 @@ impl BinarySerializable for DateTimePrecision { } } -impl<'de> Deserialize<'de> for DateTimePrecision { - fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> { - let time_precision: String = Deserialize::deserialize(deserializer)?; - match time_precision.as_str() { - "secs" => Ok(DateTimePrecision::Seconds), - "millis" => Ok(DateTimePrecision::Milliseconds), - "micros" => Ok(DateTimePrecision::Microseconds), - "nanos" => Ok(DateTimePrecision::Nanoseconds), - unknown => Err(D::Error::custom(format!( - "Unknown precision value `{}` specified.", - unknown - ))), - } - } -} - -impl Serialize for DateTimePrecision { - fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { - match self { - DateTimePrecision::Seconds => serializer.serialize_str("secs"), - DateTimePrecision::Milliseconds => serializer.serialize_str("millis"), - DateTimePrecision::Microseconds => serializer.serialize_str("micros"), - DateTimePrecision::Nanoseconds => serializer.serialize_str("nanos"), - } - } -} - /// Convert a timestamp with precision to OffsetDateTime. fn precise_timestamp_to_datetime( timestamp: i64, @@ -294,7 +274,7 @@ mod tests { "2020-01-02 00:30:00" ); assert_eq!( - dt.format(DateTimeFormat::UnixTimestamp(DateTimePrecision::Seconds)) + dt.format(DateTimeFormat::Timestamp(DateTimePrecision::Seconds)) .unwrap(), "1577925000" ); diff --git a/src/schema/date_time_options.rs b/src/schema/date_time_options.rs index 90964c26ff..dde10807e3 100644 --- a/src/schema/date_time_options.rs +++ b/src/schema/date_time_options.rs @@ -1,12 +1,8 @@ use std::collections::{HashSet, VecDeque}; use std::ops::BitOr; -use std::str::FromStr; use std::sync::{Arc, Mutex}; -use chrono::TimeZone; -use chrono_tz::Tz; -use serde::de::Error; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Serialize}; use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339}; use time::OffsetDateTime; @@ -14,10 +10,7 @@ use super::Cardinality; use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; use crate::DateTimePrecision; -/// Define how an u64, i64, of f64 field should be handled by tantivy. -// #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] -// #[serde(from = "DateTimeOptionsDeser")] - +/// Defines how DateTime field should be handled by tantivy. #[derive(Clone, Serialize, Deserialize, Derivative)] #[derivative(Debug, Eq, PartialEq)] #[serde(deny_unknown_fields)] @@ -33,13 +26,6 @@ pub struct DateTimeOptions { #[serde(default = "default_input_formats")] input_formats: HashSet, - // Default timezone used when the timezone cannot be - // extracted or implied from the input. - #[serde(default = "default_input_timezone")] - #[serde(serialize_with = "serialize_time_zone")] - #[serde(deserialize_with = "deserialize_time_zone")] - input_timezone: Tz, - // Internal storage precision, used to avoid storing // very large numbers when not needed. This optimizes compression. #[serde(default)] @@ -61,7 +47,6 @@ impl Default for DateTimeOptions { fast: None, stored: true, input_formats: default_input_formats(), - input_timezone: default_input_timezone(), precision: DateTimePrecision::default(), parsers: Arc::new(Mutex::new(None)), } @@ -161,22 +146,6 @@ impl DateTimeOptions { &self.input_formats } - /// Sets the default timezone. - /// - /// This is the timezone to fallback to when a timezone cannot - /// be extracted of implied from the input. - pub fn set_default_input_timezone(mut self, timezone: Tz) -> DateTimeOptions { - // TODO-evan: ASK: I think it's better we use the name of the timezone - // Tz::from_str(&time_zone_name) wonder how will be the good builder pattern - self.input_timezone = timezone; - self - } - - /// Returns the default timezone. - pub fn get_default_input_timezone(&self) -> Tz { - self.input_timezone - } - /// Sets the precision for this DateTime field. /// /// Internal storage precision: Used to avoid storing @@ -290,7 +259,7 @@ impl Default for DateTimeParsersHolder { fn default() -> Self { Self { string_parsers: VecDeque::new(), - number_parser: make_unix_timestamp_parser(DateTimePrecision::Milliseconds), + number_parser: make_timestamp_parser(DateTimePrecision::Milliseconds), } } } @@ -319,17 +288,17 @@ impl From for DateTimeParsersHolder { fn from(opts: DateTimeOptions) -> Self { let mut string_parsers: VecDeque = VecDeque::new(); let mut number_parser: NumberDateTimeParser = - make_unix_timestamp_parser(DateTimePrecision::Milliseconds); + make_timestamp_parser(DateTimePrecision::Milliseconds); for input_format in opts.input_formats { match input_format { DateTimeFormat::RCF3339 => string_parsers.push_back(Arc::new(rfc3339_parser)), DateTimeFormat::RFC2822 => string_parsers.push_back(Arc::new(rfc2822_parser)), DateTimeFormat::ISO8601 => string_parsers.push_back(Arc::new(iso8601_parser)), DateTimeFormat::Strftime(str_format) => { - string_parsers.push_back(make_strftime_parser(str_format, opts.input_timezone)) + string_parsers.push_back(make_strftime_parser(str_format)) } - DateTimeFormat::UnixTimestamp(precision) => { - number_parser = make_unix_timestamp_parser(precision) + DateTimeFormat::Timestamp(precision) => { + number_parser = make_timestamp_parser(precision) } } } @@ -356,9 +325,9 @@ fn iso8601_parser(value: &str) -> Result { OffsetDateTime::parse(value, &Iso8601::DEFAULT).map_err(|error| error.to_string()) } -/// Configures and returns a function for parsing datetime strings +/// Configures and returns a function for parsing DateTime strings /// using strftime formatting. -fn make_strftime_parser(format: String, default_timezone: Tz) -> StringDateTimeParser { +fn make_strftime_parser(format: String) -> StringDateTimeParser { Arc::new(move |value: &str| { // expect timezone let date_time = if format.contains("%z") { @@ -367,13 +336,7 @@ fn make_strftime_parser(format: String, default_timezone: Tz) -> StringDateTimeP .map(|date_time| date_time.naive_utc())? } else { chrono::NaiveDateTime::parse_from_str(value, &format) - .map_err(|error| error.to_string()) - .map(|date_time| { - default_timezone - .from_local_datetime(&date_time) - .unwrap() - .naive_utc() - })? + .map_err(|error| error.to_string())? }; OffsetDateTime::from_unix_timestamp_nanos(date_time.timestamp_nanos() as i128) @@ -383,7 +346,7 @@ fn make_strftime_parser(format: String, default_timezone: Tz) -> StringDateTimeP /// Configures and returns a function for interpreting numbers /// as timestamp with a precision. -fn make_unix_timestamp_parser(precision: DateTimePrecision) -> NumberDateTimeParser { +fn make_timestamp_parser(precision: DateTimePrecision) -> NumberDateTimeParser { Arc::new(move |value: i64| { let date_time = match precision { DateTimePrecision::Seconds => OffsetDateTime::from_unix_timestamp(value), @@ -402,14 +365,19 @@ fn make_unix_timestamp_parser(precision: DateTimePrecision) -> NumberDateTimePar } /// An enum specifying all supported DateTime parsing format. -#[derive(Clone, Debug, Eq, Derivative)] +#[derive(Clone, Debug, Eq, Derivative, Serialize, Deserialize)] #[derivative(Hash, PartialEq)] pub enum DateTimeFormat { + /// The rcf3339 input format. RCF3339, + /// The rcf2822 input format. RFC2822, + /// The iso8601 input format. ISO8601, + /// The srtftime input format. Strftime(String), - UnixTimestamp( + /// The timestamp input format with precision. + Timestamp( #[derivative(PartialEq = "ignore")] #[derivative(Hash = "ignore")] DateTimePrecision, @@ -422,69 +390,13 @@ impl Default for DateTimeFormat { } } -impl<'de> Deserialize<'de> for DateTimeFormat { - fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> { - let value = String::deserialize(deserializer)?; - match value.to_lowercase().as_str() { - "rfc3339" => Ok(DateTimeFormat::RCF3339), - "rfc2822" => Ok(DateTimeFormat::RFC2822), - "iso8601" => Ok(DateTimeFormat::ISO8601), - "unix_ts_secs" => Ok(DateTimeFormat::UnixTimestamp(DateTimePrecision::Seconds)), - "unix_ts_millis" => Ok(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Milliseconds, - )), - "unix_ts_micros" => Ok(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Microseconds, - )), - "unix_ts_nanos" => Ok(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Nanoseconds, - )), - _ => Ok(DateTimeFormat::Strftime(value)), - } - } -} - -impl Serialize for DateTimeFormat { - fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { - match self { - DateTimeFormat::RCF3339 => serializer.serialize_str("rfc3339"), - DateTimeFormat::RFC2822 => serializer.serialize_str("rfc2822"), - DateTimeFormat::ISO8601 => serializer.serialize_str("iso8601"), - DateTimeFormat::Strftime(format) => serializer.serialize_str(format), - DateTimeFormat::UnixTimestamp(precision) => match precision { - DateTimePrecision::Seconds => serializer.serialize_str("unix_ts_secs"), - DateTimePrecision::Milliseconds => serializer.serialize_str("unix_ts_millis"), - DateTimePrecision::Microseconds => serializer.serialize_str("unix_ts_micros"), - DateTimePrecision::Nanoseconds => serializer.serialize_str("unix_ts_nanos"), - }, - } - } -} - fn default_input_formats() -> HashSet { let mut input_formats = HashSet::new(); input_formats.insert(DateTimeFormat::ISO8601); - input_formats.insert(DateTimeFormat::UnixTimestamp(DateTimePrecision::default())); + input_formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::default())); input_formats } -pub(super) fn deserialize_time_zone<'de, D>(deserializer: D) -> Result -where D: Deserializer<'de> { - let time_zone_name: String = Deserialize::deserialize(deserializer)?; - Tz::from_str(&time_zone_name).map_err(D::Error::custom) -} - -pub(super) fn serialize_time_zone(time_zone: &Tz, s: S) -> Result -where S: Serializer { - s.serialize_str(&time_zone.to_string()) -} - -fn default_input_timezone() -> Tz { - Tz::UTC -} - #[cfg(test)] mod tests { use std::collections::HashSet; @@ -504,25 +416,17 @@ mod tests { formats.insert(DateTimeFormat::Strftime( "%a %b %d %H:%M:%S %z %Y".to_string(), )); - formats.insert(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Microseconds, - )); + formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Microseconds)); assert_eq!(formats.len(), 3); } #[test] - fn test_only_one_unix_ts_format_can_be_added() { + fn test_only_one_timestamp_format_can_be_added() { let mut formats = HashSet::new(); - formats.insert(DateTimeFormat::UnixTimestamp(DateTimePrecision::Seconds)); - formats.insert(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Microseconds, - )); - formats.insert(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Milliseconds, - )); - formats.insert(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Nanoseconds, - )); + formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Seconds)); + formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Microseconds)); + formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Milliseconds)); + formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Nanoseconds)); assert_eq!(formats.len(), 1) } @@ -544,10 +448,9 @@ mod tests { let date_time_options = serde_json::from_str::( r#"{ "input_formats": [ - "rfc3339", "rfc2822", "unix_ts_millis", "%Y %m %d %H:%M:%S %z" + "RCF3339", "RFC2822", {"Timestamp": "Milliseconds"}, {"Strftime": "%Y %m %d %H:%M:%S %z"} ], - "input_timezone": "Africa/Lagos", - "precision": "millis", + "precision": "Milliseconds", "indexed": true, "fieldnorms": false, "stored": false @@ -558,14 +461,11 @@ mod tests { let mut input_formats = HashSet::new(); input_formats.insert(DateTimeFormat::RCF3339); input_formats.insert(DateTimeFormat::RFC2822); - input_formats.insert(DateTimeFormat::UnixTimestamp( - DateTimePrecision::Milliseconds, - )); + input_formats.insert(DateTimeFormat::Timestamp(DateTimePrecision::Milliseconds)); input_formats.insert(DateTimeFormat::Strftime("%Y %m %d %H:%M:%S %z".to_string())); let expected_dt_opts = DateTimeOptions { input_formats, - input_timezone: Tz::Africa__Lagos, precision: DateTimePrecision::Milliseconds, indexed: true, fieldnorms: false, @@ -584,31 +484,19 @@ mod tests { { "indexed": true, "fieldnorms": false, - "stored": false + "stored": false, + "input_formats": [{"Timestamp": "Milliseconds"}] }"#, ) .unwrap(); // re-order the input-formats array - let mut date_time_options_json = serde_json::to_value(&date_time_options).unwrap(); - let mut formats = date_time_options_json - .get("input_formats") - .unwrap() - .as_array() - .unwrap() - .iter() - .map(|val| val.as_str().unwrap().to_string()) - .collect::>(); - formats.sort(); - let input_formats = date_time_options_json.get_mut("input_formats").unwrap(); - *input_formats = serde_json::to_value(formats).unwrap(); - + let date_time_options_json = serde_json::to_value(&date_time_options).unwrap(); assert_eq!( date_time_options_json, serde_json::json!({ - "input_formats": ["iso8601", "unix_ts_millis"], - "input_timezone": "UTC", - "precision": "millis", + "input_formats": [{"Timestamp": "Milliseconds"}], + "precision": "Milliseconds", "indexed": true, "fieldnorms": false, "stored": false @@ -616,7 +504,6 @@ mod tests { ); } - // test config errors #[test] fn test_deserialize_date_time_options_with_wrong_options() { assert!(serde_json::from_str::( @@ -631,20 +518,19 @@ mod tests { .to_string() .contains( "unknown field `name`, expected one of `indexed`, `fieldnorms`, `fast`, `stored`, \ - `input_formats`, `input_timezone`, `precision`" + `input_formats`, `precision`" )); assert!(serde_json::from_str::( r#"{ "indexed": true, "fieldnorms": false, - "stored": false, - "input_timezone": "Africa/Paris" + "stored": "wrong_value", }"# ) .unwrap_err() .to_string() - .contains("Africa/Paris' is not a valid timezone")); + .contains("expected a boolean")); assert!(serde_json::from_str::( r#"{ @@ -656,47 +542,44 @@ mod tests { ) .unwrap_err() .to_string() - .contains("Unknown precision value `hours` specified.")); + .contains("unknown variant `hours`")); } #[test] fn test_strftime_parser() { - let parse_without_timezone = - make_strftime_parser("%Y-%m-%d %H:%M:%S".to_string(), Tz::Africa__Lagos); - + let parse_without_timezone = make_strftime_parser("%Y-%m-%d %H:%M:%S".to_string()); let date_time = parse_without_timezone("2012-05-21 12:09:14").unwrap(); assert_eq!(date_time.date(), date!(2012 - 05 - 21)); - assert_eq!(date_time.time(), time!(11:09:14)); + assert_eq!(date_time.time(), time!(12:09:14)); - let parse_with_timezone = - make_strftime_parser("%Y-%m-%d %H:%M:%S %z".to_string(), Tz::Africa__Lagos); + let parse_with_timezone = make_strftime_parser("%Y-%m-%d %H:%M:%S %z".to_string()); let date_time = parse_with_timezone("2012-05-21 12:09:14 -02:00").unwrap(); assert_eq!(date_time.date(), date!(2012 - 05 - 21)); assert_eq!(date_time.time(), time!(14:09:14)); } #[test] - fn test_unix_timestamp_parser() { + fn test_timestamp_parser() { let now = time::OffsetDateTime::now_utc(); - let parse_with_secs = make_unix_timestamp_parser(DateTimePrecision::Seconds); + let parse_with_secs = make_timestamp_parser(DateTimePrecision::Seconds); let date_time = parse_with_secs(now.unix_timestamp()).unwrap(); assert_eq!(date_time.date(), now.date()); assert_eq!(date_time.time().as_hms(), now.time().as_hms()); - let parse_with_millis = make_unix_timestamp_parser(DateTimePrecision::Milliseconds); + let parse_with_millis = make_timestamp_parser(DateTimePrecision::Milliseconds); let ts_millis = now.unix_timestamp_nanos() / 1_000_000; let date_time = parse_with_millis(ts_millis as i64).unwrap(); assert_eq!(date_time.date(), now.date()); assert_eq!(date_time.time().as_hms_milli(), now.time().as_hms_milli()); - let parse_with_micros = make_unix_timestamp_parser(DateTimePrecision::Microseconds); + let parse_with_micros = make_timestamp_parser(DateTimePrecision::Microseconds); let ts_micros = now.unix_timestamp_nanos() / 1000; let date_time = parse_with_micros(ts_micros as i64).unwrap(); assert_eq!(date_time.date(), now.date()); assert_eq!(date_time.time().as_hms_micro(), now.time().as_hms_micro()); - let parse_with_nanos = make_unix_timestamp_parser(DateTimePrecision::Nanoseconds); + let parse_with_nanos = make_timestamp_parser(DateTimePrecision::Nanoseconds); let date_time = parse_with_nanos(now.unix_timestamp_nanos() as i64).unwrap(); assert_eq!(date_time.date(), now.date()); assert_eq!(date_time.time(), now.time()); diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 463c04047f..b9997f79dc 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -130,7 +130,7 @@ mod value; pub use self::bytes_options::BytesOptions; pub use self::date_time::{DateTime, DateTimePrecision}; -pub use self::date_time_options::DateTimeOptions; +pub use self::date_time_options::{DateTimeFormat, DateTimeOptions}; pub use self::document::Document; pub(crate) use self::facet::FACET_SEP_BYTE; pub use self::facet::{Facet, FacetParseError};