Skip to content

Commit

Permalink
refactored and removed non necessary stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
evanxg852000 committed Jun 24, 2022
1 parent 1f7b8e4 commit 8a07c12
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 166 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ async-trait = "0.1.53"
derivative = "2.2.0"
time = { version = "0.3.10", features = ["std", "macros", "serde-well-known"] }
chrono = "0.4.19"
chrono-tz = "0.6"

[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
Expand Down
34 changes: 2 additions & 32 deletions src/schema/date_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use std::{fmt, io};
use byteorder::{ReadBytesExt, WriteBytesExt};
use chrono::NaiveDate;
use common::BinarySerializable;
use serde::de::Error;
use serde::{Deserialize, Deserializer, Serialize};
use serde::{Deserialize, Serialize};

use super::date_time_options::DateTimeFormat;
use crate::time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339};
Expand Down Expand Up @@ -182,7 +181,7 @@ impl fmt::Debug for DateTime {
}

/// DateTime Precision
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[repr(u8)]
pub enum DateTimePrecision {
/// Seconds precision
Expand Down Expand Up @@ -225,35 +224,6 @@ impl BinarySerializable for DateTimePrecision {
}
}

impl<'de> Deserialize<'de> for DateTimePrecision {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de> {
let time_precision: String = Deserialize::deserialize(deserializer)?;
match time_precision.as_str() {
"secs" => Ok(DateTimePrecision::Seconds),
"millis" => Ok(DateTimePrecision::Milliseconds),
"micros" => Ok(DateTimePrecision::Microseconds),
"nanos" => Ok(DateTimePrecision::Nanoseconds),
unknown => Err(D::Error::custom(format!(
"Unknown precision value `{}` specified.",
unknown
))),
}
}
}

impl Serialize for DateTimePrecision {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer {
match self {
DateTimePrecision::Seconds => serializer.serialize_str("secs"),
DateTimePrecision::Milliseconds => serializer.serialize_str("millis"),
DateTimePrecision::Microseconds => serializer.serialize_str("micros"),
DateTimePrecision::Nanoseconds => serializer.serialize_str("nanos"),
}
}
}

/// Convert a timestamp with precision to OffsetDateTime.
fn precise_timestamp_to_datetime(
timestamp: i64,
Expand Down
154 changes: 21 additions & 133 deletions src/schema/date_time_options.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
use std::collections::{HashSet, VecDeque};
use std::ops::BitOr;
use std::str::FromStr;
use std::sync::{Arc, Mutex};

use chrono::TimeZone;
use chrono_tz::Tz;
use serde::de::Error;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::{Deserialize, Serialize};
use time::format_description::well_known::{Iso8601, Rfc2822, Rfc3339};
use time::OffsetDateTime;

use super::Cardinality;
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
use crate::DateTimePrecision;

/// Define how an u64, i64, of f64 field should be handled by tantivy.
// #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
// #[serde(from = "DateTimeOptionsDeser")]

/// Defines how DateTime field should be handled by tantivy.
#[derive(Clone, Serialize, Deserialize, Derivative)]
#[derivative(Debug, Eq, PartialEq)]
#[serde(deny_unknown_fields)]
Expand All @@ -33,13 +26,6 @@ pub struct DateTimeOptions {
#[serde(default = "default_input_formats")]
input_formats: HashSet<DateTimeFormat>,

// Default timezone used when the timezone cannot be
// extracted or implied from the input.
#[serde(default = "default_input_timezone")]
#[serde(serialize_with = "serialize_time_zone")]
#[serde(deserialize_with = "deserialize_time_zone")]
input_timezone: Tz,

// Internal storage precision, used to avoid storing
// very large numbers when not needed. This optimizes compression.
#[serde(default)]
Expand All @@ -61,7 +47,6 @@ impl Default for DateTimeOptions {
fast: None,
stored: true,
input_formats: default_input_formats(),
input_timezone: default_input_timezone(),
precision: DateTimePrecision::default(),
parsers: Arc::new(Mutex::new(None)),
}
Expand Down Expand Up @@ -161,22 +146,6 @@ impl DateTimeOptions {
&self.input_formats
}

/// Sets the default timezone.
///
/// This is the timezone to fallback to when a timezone cannot
/// be extracted of implied from the input.
pub fn set_default_input_timezone(mut self, timezone: Tz) -> DateTimeOptions {
// TODO-evan: ASK: I think it's better we use the name of the timezone
// Tz::from_str(&time_zone_name) wonder how will be the good builder pattern
self.input_timezone = timezone;
self
}

/// Returns the default timezone.
pub fn get_default_input_timezone(&self) -> Tz {
self.input_timezone
}

/// Sets the precision for this DateTime field.
///
/// Internal storage precision: Used to avoid storing
Expand Down Expand Up @@ -326,7 +295,7 @@ impl From<DateTimeOptions> for DateTimeParsersHolder {
DateTimeFormat::RFC2822 => string_parsers.push_back(Arc::new(rfc2822_parser)),
DateTimeFormat::ISO8601 => string_parsers.push_back(Arc::new(iso8601_parser)),
DateTimeFormat::Strftime(str_format) => {
string_parsers.push_back(make_strftime_parser(str_format, opts.input_timezone))
string_parsers.push_back(make_strftime_parser(str_format))
}
DateTimeFormat::UnixTimestamp(precision) => {
number_parser = make_unix_timestamp_parser(precision)
Expand Down Expand Up @@ -356,9 +325,9 @@ fn iso8601_parser(value: &str) -> Result<OffsetDateTime, String> {
OffsetDateTime::parse(value, &Iso8601::DEFAULT).map_err(|error| error.to_string())
}

/// Configures and returns a function for parsing datetime strings
/// Configures and returns a function for parsing DateTime strings
/// using strftime formatting.
fn make_strftime_parser(format: String, default_timezone: Tz) -> StringDateTimeParser {
fn make_strftime_parser(format: String) -> StringDateTimeParser {
Arc::new(move |value: &str| {
// expect timezone
let date_time = if format.contains("%z") {
Expand All @@ -367,13 +336,7 @@ fn make_strftime_parser(format: String, default_timezone: Tz) -> StringDateTimeP
.map(|date_time| date_time.naive_utc())?
} else {
chrono::NaiveDateTime::parse_from_str(value, &format)
.map_err(|error| error.to_string())
.map(|date_time| {
default_timezone
.from_local_datetime(&date_time)
.unwrap()
.naive_utc()
})?
.map_err(|error| error.to_string())?
};

OffsetDateTime::from_unix_timestamp_nanos(date_time.timestamp_nanos() as i128)
Expand Down Expand Up @@ -402,7 +365,7 @@ fn make_unix_timestamp_parser(precision: DateTimePrecision) -> NumberDateTimePar
}

/// An enum specifying all supported DateTime parsing format.
#[derive(Clone, Debug, Eq, Derivative)]
#[derive(Clone, Debug, Eq, Derivative, Serialize, Deserialize)]
#[derivative(Hash, PartialEq)]
pub enum DateTimeFormat {
RCF3339,
Expand All @@ -422,69 +385,13 @@ impl Default for DateTimeFormat {
}
}

impl<'de> Deserialize<'de> for DateTimeFormat {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: Deserializer<'de> {
let value = String::deserialize(deserializer)?;
match value.to_lowercase().as_str() {
"rfc3339" => Ok(DateTimeFormat::RCF3339),
"rfc2822" => Ok(DateTimeFormat::RFC2822),
"iso8601" => Ok(DateTimeFormat::ISO8601),
"unix_ts_secs" => Ok(DateTimeFormat::UnixTimestamp(DateTimePrecision::Seconds)),
"unix_ts_millis" => Ok(DateTimeFormat::UnixTimestamp(
DateTimePrecision::Milliseconds,
)),
"unix_ts_micros" => Ok(DateTimeFormat::UnixTimestamp(
DateTimePrecision::Microseconds,
)),
"unix_ts_nanos" => Ok(DateTimeFormat::UnixTimestamp(
DateTimePrecision::Nanoseconds,
)),
_ => Ok(DateTimeFormat::Strftime(value)),
}
}
}

impl Serialize for DateTimeFormat {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer {
match self {
DateTimeFormat::RCF3339 => serializer.serialize_str("rfc3339"),
DateTimeFormat::RFC2822 => serializer.serialize_str("rfc2822"),
DateTimeFormat::ISO8601 => serializer.serialize_str("iso8601"),
DateTimeFormat::Strftime(format) => serializer.serialize_str(format),
DateTimeFormat::UnixTimestamp(precision) => match precision {
DateTimePrecision::Seconds => serializer.serialize_str("unix_ts_secs"),
DateTimePrecision::Milliseconds => serializer.serialize_str("unix_ts_millis"),
DateTimePrecision::Microseconds => serializer.serialize_str("unix_ts_micros"),
DateTimePrecision::Nanoseconds => serializer.serialize_str("unix_ts_nanos"),
},
}
}
}

fn default_input_formats() -> HashSet<DateTimeFormat> {
let mut input_formats = HashSet::new();
input_formats.insert(DateTimeFormat::ISO8601);
input_formats.insert(DateTimeFormat::UnixTimestamp(DateTimePrecision::default()));
input_formats
}

pub(super) fn deserialize_time_zone<'de, D>(deserializer: D) -> Result<Tz, D::Error>
where D: Deserializer<'de> {
let time_zone_name: String = Deserialize::deserialize(deserializer)?;
Tz::from_str(&time_zone_name).map_err(D::Error::custom)
}

pub(super) fn serialize_time_zone<S>(time_zone: &Tz, s: S) -> Result<S::Ok, S::Error>
where S: Serializer {
s.serialize_str(&time_zone.to_string())
}

fn default_input_timezone() -> Tz {
Tz::UTC
}

#[cfg(test)]
mod tests {
use std::collections::HashSet;
Expand Down Expand Up @@ -544,10 +451,9 @@ mod tests {
let date_time_options = serde_json::from_str::<DateTimeOptions>(
r#"{
"input_formats": [
"rfc3339", "rfc2822", "unix_ts_millis", "%Y %m %d %H:%M:%S %z"
"RCF3339", "RFC2822", {"UnixTimestamp": "Milliseconds"}, {"Strftime": "%Y %m %d %H:%M:%S %z"}
],
"input_timezone": "Africa/Lagos",
"precision": "millis",
"precision": "Milliseconds",
"indexed": true,
"fieldnorms": false,
"stored": false
Expand All @@ -565,7 +471,6 @@ mod tests {

let expected_dt_opts = DateTimeOptions {
input_formats,
input_timezone: Tz::Africa__Lagos,
precision: DateTimePrecision::Milliseconds,
indexed: true,
fieldnorms: false,
Expand All @@ -584,39 +489,26 @@ mod tests {
{
"indexed": true,
"fieldnorms": false,
"stored": false
"stored": false,
"input_formats": [{"UnixTimestamp": "Milliseconds"}]
}"#,
)
.unwrap();

// re-order the input-formats array
let mut date_time_options_json = serde_json::to_value(&date_time_options).unwrap();
let mut formats = date_time_options_json
.get("input_formats")
.unwrap()
.as_array()
.unwrap()
.iter()
.map(|val| val.as_str().unwrap().to_string())
.collect::<Vec<_>>();
formats.sort();
let input_formats = date_time_options_json.get_mut("input_formats").unwrap();
*input_formats = serde_json::to_value(formats).unwrap();

let date_time_options_json = serde_json::to_value(&date_time_options).unwrap();
assert_eq!(
date_time_options_json,
serde_json::json!({
"input_formats": ["iso8601", "unix_ts_millis"],
"input_timezone": "UTC",
"precision": "millis",
"input_formats": [{"UnixTimestamp": "Milliseconds"}],
"precision": "Milliseconds",
"indexed": true,
"fieldnorms": false,
"stored": false
})
);
}

// test config errors
#[test]
fn test_deserialize_date_time_options_with_wrong_options() {
assert!(serde_json::from_str::<DateTimeOptions>(
Expand All @@ -631,20 +523,19 @@ mod tests {
.to_string()
.contains(
"unknown field `name`, expected one of `indexed`, `fieldnorms`, `fast`, `stored`, \
`input_formats`, `input_timezone`, `precision`"
`input_formats`, `precision`"
));

assert!(serde_json::from_str::<DateTimeOptions>(
r#"{
"indexed": true,
"fieldnorms": false,
"stored": false,
"input_timezone": "Africa/Paris"
"stored": "wrong_value",
}"#
)
.unwrap_err()
.to_string()
.contains("Africa/Paris' is not a valid timezone"));
.contains("expected a boolean"));

assert!(serde_json::from_str::<DateTimeOptions>(
r#"{
Expand All @@ -656,20 +547,17 @@ mod tests {
)
.unwrap_err()
.to_string()
.contains("Unknown precision value `hours` specified."));
.contains("unknown variant `hours`"));
}

#[test]
fn test_strftime_parser() {
let parse_without_timezone =
make_strftime_parser("%Y-%m-%d %H:%M:%S".to_string(), Tz::Africa__Lagos);

let parse_without_timezone = make_strftime_parser("%Y-%m-%d %H:%M:%S".to_string());
let date_time = parse_without_timezone("2012-05-21 12:09:14").unwrap();
assert_eq!(date_time.date(), date!(2012 - 05 - 21));
assert_eq!(date_time.time(), time!(11:09:14));
assert_eq!(date_time.time(), time!(12:09:14));

let parse_with_timezone =
make_strftime_parser("%Y-%m-%d %H:%M:%S %z".to_string(), Tz::Africa__Lagos);
let parse_with_timezone = make_strftime_parser("%Y-%m-%d %H:%M:%S %z".to_string());
let date_time = parse_with_timezone("2012-05-21 12:09:14 -02:00").unwrap();
assert_eq!(date_time.date(), date!(2012 - 05 - 21));
assert_eq!(date_time.time(), time!(14:09:14));
Expand Down

0 comments on commit 8a07c12

Please sign in to comment.