diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 7675096e94..7e8dc1dbd8 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -329,16 +329,66 @@ impl FieldType { Ok(DateTime::from_utc(dt_with_fixed_tz).into()) } FieldType::Str(_) => Ok(Value::Str(field_text)), - FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => { - Err(ValueParsingError::TypeError { - expected: "an integer", - json: JsonValue::String(field_text), - }) + FieldType::U64(opt) => { + if opt.should_coerce() { + Ok(Value::U64(field_text.parse().map_err(|_| { + ValueParsingError::TypeError { + expected: "a u64 or a u64 as string", + json: JsonValue::String(field_text), + } + })?)) + } else { + Err(ValueParsingError::TypeError { + expected: "a u64", + json: JsonValue::String(field_text), + }) + } + } + FieldType::I64(opt) => { + if opt.should_coerce() { + Ok(Value::I64(field_text.parse().map_err(|_| { + ValueParsingError::TypeError { + expected: "a i64 or a i64 as string", + json: JsonValue::String(field_text), + } + })?)) + } else { + Err(ValueParsingError::TypeError { + expected: "a i64", + json: JsonValue::String(field_text), + }) + } + } + FieldType::F64(opt) => { + if opt.should_coerce() { + Ok(Value::F64(field_text.parse().map_err(|_| { + ValueParsingError::TypeError { + expected: "a f64 or a f64 as string", + json: JsonValue::String(field_text), + } + })?)) + } else { + Err(ValueParsingError::TypeError { + expected: "a f64", + json: JsonValue::String(field_text), + }) + } + } + FieldType::Bool(opt) => { + if opt.should_coerce() { + Ok(Value::Bool(field_text.parse().map_err(|_| { + ValueParsingError::TypeError { + expected: "a i64 or a bool as string", + json: JsonValue::String(field_text), + } + })?)) + } else { + Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::String(field_text), + }) + } } - FieldType::Bool(_) => Err(ValueParsingError::TypeError { - expected: "a boolean", - json: JsonValue::String(field_text), - }), FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), FieldType::Bytes(_) => BASE64 .decode(&field_text) @@ -395,12 +445,20 @@ impl FieldType { expected: "a boolean", json: JsonValue::Number(field_val_num), }), - FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => { - Err(ValueParsingError::TypeError { - expected: "a string", - json: JsonValue::Number(field_val_num), - }) + FieldType::Str(opt) => { + if opt.should_coerce() { + Ok(Value::Str(field_val_num.to_string())) + } else { + Err(ValueParsingError::TypeError { + expected: "a string", + json: JsonValue::Number(field_val_num), + }) + } } + FieldType::Facet(_) | FieldType::Bytes(_) => Err(ValueParsingError::TypeError { + expected: "a string", + json: JsonValue::Number(field_val_num), + }), FieldType::JsonObject(_) => Err(ValueParsingError::TypeError { expected: "a json object", json: JsonValue::Number(field_val_num), @@ -431,11 +489,38 @@ impl FieldType { }, JsonValue::Bool(json_bool_val) => match self { FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)), + FieldType::Str(opt) => { + if opt.should_coerce() { + Ok(Value::Str(json_bool_val.to_string())) + } else { + Err(ValueParsingError::TypeError { + expected: "a string", + json: JsonValue::Bool(json_bool_val), + }) + } + } _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: JsonValue::Bool(json_bool_val), }), }, + // Could also just filter them + JsonValue::Null => match self { + FieldType::Str(opt) => { + if opt.should_coerce() { + Ok(Value::Str("null".to_string())) + } else { + Err(ValueParsingError::TypeError { + expected: "a string", + json: JsonValue::Null, + }) + } + } + _ => Err(ValueParsingError::TypeError { + expected: self.value_type().name(), + json: JsonValue::Null, + }), + }, _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: json.clone(), @@ -450,11 +535,90 @@ mod tests { use super::FieldType; use crate::schema::field_type::ValueParsingError; - use crate::schema::{Schema, TextOptions, Type, Value, INDEXED}; + use crate::schema::{NumericOptions, Schema, TextOptions, Type, Value, COERCE, INDEXED}; use crate::time::{Date, Month, PrimitiveDateTime, Time}; use crate::tokenizer::{PreTokenizedString, Token}; use crate::{DateTime, Document}; + #[test] + fn test_to_string_coercion() { + let mut schema_builder = Schema::builder(); + let text_field = schema_builder.add_text_field("id", COERCE); + let schema = schema_builder.build(); + let doc = schema.parse_document(r#"{"id": 100}"#).unwrap(); + assert_eq!( + &Value::Str("100".to_string()), + doc.get_first(text_field).unwrap() + ); + + let doc = schema.parse_document(r#"{"id": true}"#).unwrap(); + assert_eq!( + &Value::Str("true".to_string()), + doc.get_first(text_field).unwrap() + ); + + // Not sure if this null coercion is the best approach + let doc = schema.parse_document(r#"{"id": null}"#).unwrap(); + assert_eq!( + &Value::Str("null".to_string()), + doc.get_first(text_field).unwrap() + ); + } + + #[test] + fn test_to_number_coercion() { + let mut schema_builder = Schema::builder(); + let i64_field = schema_builder.add_i64_field("i64", COERCE); + let u64_field = schema_builder.add_u64_field("u64", COERCE); + let f64_field = schema_builder.add_f64_field("f64", COERCE); + let schema = schema_builder.build(); + let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#; + let doc = schema.parse_document(doc_json).unwrap(); + assert_eq!(&Value::I64(100), doc.get_first(i64_field).unwrap()); + assert_eq!(&Value::U64(100), doc.get_first(u64_field).unwrap()); + assert_eq!(&Value::F64(100.0), doc.get_first(f64_field).unwrap()); + } + + #[test] + fn test_to_bool_coercion() { + let mut schema_builder = Schema::builder(); + let bool_field = schema_builder.add_bool_field("bool", COERCE); + let schema = schema_builder.build(); + let doc_json = r#"{"bool": "true"}"#; + let doc = schema.parse_document(doc_json).unwrap(); + assert_eq!(&Value::Bool(true), doc.get_first(bool_field).unwrap()); + + let doc_json = r#"{"bool": "false"}"#; + let doc = schema.parse_document(doc_json).unwrap(); + assert_eq!(&Value::Bool(false), doc.get_first(bool_field).unwrap()); + } + + #[test] + fn test_to_number_no_coercion() { + let mut schema_builder = Schema::builder(); + schema_builder.add_i64_field("i64", NumericOptions::default()); + schema_builder.add_u64_field("u64", NumericOptions::default()); + schema_builder.add_f64_field("f64", NumericOptions::default()); + let schema = schema_builder.build(); + assert!(schema + .parse_document(r#"{"u64": "100"}"#) + .unwrap_err() + .to_string() + .contains("a u64")); + + assert!(schema + .parse_document(r#"{"i64": "100"}"#) + .unwrap_err() + .to_string() + .contains("a i64")); + + assert!(schema + .parse_document(r#"{"f64": "100"}"#) + .unwrap_err() + .to_string() + .contains("a f64")); + } + #[test] fn test_deserialize_json_date() { let mut schema_builder = Schema::builder(); diff --git a/src/schema/flags.rs b/src/schema/flags.rs index 7c7238908e..449e12a4c6 100644 --- a/src/schema/flags.rs +++ b/src/schema/flags.rs @@ -31,6 +31,18 @@ pub const INDEXED: SchemaFlagList = SchemaFlagList { tail: (), }; +#[derive(Clone)] +pub struct CoerceFlag; +/// Flag to mark the field as coerced. +/// +/// `COERCE` will try to convert values into its value type if they don't match. +/// +/// See [fast fields](`crate::fastfield`). +pub const COERCE: SchemaFlagList = SchemaFlagList { + head: CoerceFlag, + tail: (), +}; + #[derive(Clone)] pub struct FastFlag; /// Flag to mark the field as a fast field (similar to Lucene's DocValues) diff --git a/src/schema/json_object_options.rs b/src/schema/json_object_options.rs index ea4f47c191..1f7653cfb5 100644 --- a/src/schema/json_object_options.rs +++ b/src/schema/json_object_options.rs @@ -39,6 +39,7 @@ pub struct JsonObjectOptions { /// `{"root": {"child": {"with": {"dot": "hello"}}}}` /// and it can be search using the following query: /// `root.child.with.dot:hello` + #[serde(default)] expand_dots_enabled: bool, } diff --git a/src/schema/mod.rs b/src/schema/mod.rs index fcf2735285..5fc1159bdc 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -138,7 +138,7 @@ pub use self::field::Field; pub use self::field_entry::FieldEntry; pub use self::field_type::{FieldType, Type}; pub use self::field_value::FieldValue; -pub use self::flags::{FAST, INDEXED, STORED}; +pub use self::flags::{COERCE, FAST, INDEXED, STORED}; pub use self::index_record_option::IndexRecordOption; pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions}; pub use self::json_object_options::JsonObjectOptions; diff --git a/src/schema/numeric_options.rs b/src/schema/numeric_options.rs index 676a7e8630..97a2ea5ec7 100644 --- a/src/schema/numeric_options.rs +++ b/src/schema/numeric_options.rs @@ -2,6 +2,7 @@ use std::ops::BitOr; use serde::{Deserialize, Serialize}; +use super::flags::CoerceFlag; use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; #[deprecated(since = "0.17.0", note = "Use NumericOptions instead.")] @@ -17,6 +18,12 @@ pub struct NumericOptions { fieldnorms: bool, // This attribute only has an effect if indexed is true. fast: bool, stored: bool, + #[serde(skip_serializing_if = "is_false")] + coerce: bool, +} + +fn is_false(val: &bool) -> bool { + !val } /// For backward compatibility we add an intermediary to interpret the @@ -32,6 +39,8 @@ struct NumericOptionsDeser { #[serde(default)] fast: bool, stored: bool, + #[serde(default)] + coerce: bool, } impl From for NumericOptions { @@ -41,6 +50,7 @@ impl From for NumericOptions { fieldnorms: deser.fieldnorms.unwrap_or(deser.indexed), fast: deser.fast, stored: deser.stored, + coerce: deser.coerce, } } } @@ -66,6 +76,18 @@ impl NumericOptions { self.fast } + /// Returns true if values should be coerced to numbers. + pub fn should_coerce(&self) -> bool { + self.coerce + } + + /// Try to coerce values if they are not a number. Defaults to false. + #[must_use] + pub fn set_coerce(mut self) -> Self { + self.coerce = true; + self + } + /// Set the field as stored. /// /// Only the fields that are set as *stored* are @@ -117,6 +139,18 @@ impl From<()> for NumericOptions { } } +impl From for NumericOptions { + fn from(_: CoerceFlag) -> NumericOptions { + NumericOptions { + indexed: false, + fieldnorms: false, + stored: false, + fast: false, + coerce: true, + } + } +} + impl From for NumericOptions { fn from(_: FastFlag) -> Self { NumericOptions { @@ -124,6 +158,7 @@ impl From for NumericOptions { fieldnorms: false, stored: false, fast: true, + coerce: false, } } } @@ -135,6 +170,7 @@ impl From for NumericOptions { fieldnorms: false, stored: true, fast: false, + coerce: false, } } } @@ -146,6 +182,7 @@ impl From for NumericOptions { fieldnorms: true, stored: false, fast: false, + coerce: false, } } } @@ -160,6 +197,7 @@ impl> BitOr for NumericOptions { fieldnorms: self.fieldnorms | other.fieldnorms, stored: self.stored | other.stored, fast: self.fast | other.fast, + coerce: self.coerce | other.coerce, } } } @@ -192,7 +230,8 @@ mod tests { indexed: true, fieldnorms: true, fast: false, - stored: false + stored: false, + coerce: false, } ); } @@ -210,7 +249,8 @@ mod tests { indexed: false, fieldnorms: false, fast: false, - stored: false + stored: false, + coerce: false, } ); } @@ -229,7 +269,8 @@ mod tests { indexed: true, fieldnorms: false, fast: false, - stored: false + stored: false, + coerce: false, } ); } @@ -249,7 +290,30 @@ mod tests { indexed: false, fieldnorms: true, fast: false, - stored: false + stored: false, + coerce: false, + } + ); + } + + #[test] + fn test_int_options_deser_if_coerce_true() { + // this one is kind of useless, at least at the moment + let json = r#"{ + "indexed": false, + "fieldnorms": true, + "stored": false, + "coerce": true + }"#; + let int_options: NumericOptions = serde_json::from_str(json).unwrap(); + assert_eq!( + &int_options, + &NumericOptions { + indexed: false, + fieldnorms: true, + fast: false, + stored: false, + coerce: true, } ); } diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 30944e82e7..8eb520f87d 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -3,7 +3,7 @@ use std::ops::BitOr; use serde::{Deserialize, Serialize}; -use super::flags::FastFlag; +use super::flags::{CoerceFlag, FastFlag}; use crate::schema::flags::{SchemaFlagList, StoredFlag}; use crate::schema::IndexRecordOption; @@ -17,6 +17,14 @@ pub struct TextOptions { stored: bool, #[serde(default)] fast: bool, + #[serde(default)] + #[serde(skip_serializing_if = "is_false")] + /// coerce values if they are not of type string + coerce: bool, +} + +fn is_false(val: &bool) -> bool { + !val } impl TextOptions { @@ -35,6 +43,11 @@ impl TextOptions { self.fast } + /// Returns true if values should be coerced to strings (numbers, null). + pub fn should_coerce(&self) -> bool { + self.coerce + } + /// Set the field as a fast field. /// /// Fast fields are designed for random access. @@ -56,7 +69,14 @@ impl TextOptions { self } - /// Sets the field as stored + /// Coerce values if they are not of type string. Defaults to false. + #[must_use] + pub fn set_coerce(mut self) -> TextOptions { + self.coerce = true; + self + } + + /// Sets the field as stored. #[must_use] pub fn set_stored(mut self) -> TextOptions { self.stored = true; @@ -180,6 +200,7 @@ pub const STRING: TextOptions = TextOptions { }), stored: false, fast: false, + coerce: false, }; /// The field will be tokenized and indexed. @@ -190,6 +211,7 @@ pub const TEXT: TextOptions = TextOptions { record: IndexRecordOption::WithFreqsAndPositions, }), stored: false, + coerce: false, fast: false, }; @@ -202,6 +224,7 @@ impl> BitOr for TextOptions { indexing: self.indexing.or(other.indexing), stored: self.stored | other.stored, fast: self.fast | other.fast, + coerce: self.coerce | other.coerce, } } } @@ -218,6 +241,18 @@ impl From for TextOptions { indexing: None, stored: true, fast: false, + coerce: false, + } + } +} + +impl From for TextOptions { + fn from(_: CoerceFlag) -> TextOptions { + TextOptions { + indexing: None, + stored: false, + fast: false, + coerce: true, } } } @@ -228,6 +263,7 @@ impl From for TextOptions { indexing: None, stored: false, fast: true, + coerce: false, } } }