Skip to content

Commit

Permalink
add coerce option for text and numbers types (#1904)
Browse files Browse the repository at this point in the history
* add coerce option for text and numbers types

allow to coerce the field type when indexing if the type does not match

* Apply suggestions from code review

Co-authored-by: Paul Masurel <[email protected]>

* add tests,add COERCE flag, include bool in coercion

---------

Co-authored-by: Paul Masurel <[email protected]>
  • Loading branch information
PSeitz and fulmicoton authored Mar 1, 2023
1 parent 850a0d7 commit faa706d
Show file tree
Hide file tree
Showing 6 changed files with 299 additions and 22 deletions.
194 changes: 179 additions & 15 deletions src/schema/field_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,16 +329,66 @@ impl FieldType {
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
Err(ValueParsingError::TypeError {
expected: "an integer",
json: JsonValue::String(field_text),
})
FieldType::U64(opt) => {
if opt.should_coerce() {
Ok(Value::U64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a u64 or a u64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a u64",
json: JsonValue::String(field_text),
})
}
}
FieldType::I64(opt) => {
if opt.should_coerce() {
Ok(Value::I64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a i64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a i64",
json: JsonValue::String(field_text),
})
}
}
FieldType::F64(opt) => {
if opt.should_coerce() {
Ok(Value::F64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a f64 or a f64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a f64",
json: JsonValue::String(field_text),
})
}
}
FieldType::Bool(opt) => {
if opt.should_coerce() {
Ok(Value::Bool(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a bool as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::String(field_text),
})
}
}
FieldType::Bool(_) => Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::String(field_text),
}),
FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))),
FieldType::Bytes(_) => BASE64
.decode(&field_text)
Expand Down Expand Up @@ -395,12 +445,20 @@ impl FieldType {
expected: "a boolean",
json: JsonValue::Number(field_val_num),
}),
FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
})
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(field_val_num.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
})
}
}
FieldType::Facet(_) | FieldType::Bytes(_) => Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
}),
FieldType::JsonObject(_) => Err(ValueParsingError::TypeError {
expected: "a json object",
json: JsonValue::Number(field_val_num),
Expand Down Expand Up @@ -431,11 +489,38 @@ impl FieldType {
},
JsonValue::Bool(json_bool_val) => match self {
FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)),
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(json_bool_val.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Bool(json_bool_val),
})
}
}
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Bool(json_bool_val),
}),
},
// Could also just filter them
JsonValue::Null => match self {
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str("null".to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Null,
})
}
}
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Null,
}),
},
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: json.clone(),
Expand All @@ -450,11 +535,90 @@ mod tests {

use super::FieldType;
use crate::schema::field_type::ValueParsingError;
use crate::schema::{Schema, TextOptions, Type, Value, INDEXED};
use crate::schema::{NumericOptions, Schema, TextOptions, Type, Value, COERCE, INDEXED};
use crate::time::{Date, Month, PrimitiveDateTime, Time};
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DateTime, Document};

#[test]
fn test_to_string_coercion() {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("id", COERCE);
let schema = schema_builder.build();
let doc = schema.parse_document(r#"{"id": 100}"#).unwrap();
assert_eq!(
&Value::Str("100".to_string()),
doc.get_first(text_field).unwrap()
);

let doc = schema.parse_document(r#"{"id": true}"#).unwrap();
assert_eq!(
&Value::Str("true".to_string()),
doc.get_first(text_field).unwrap()
);

// Not sure if this null coercion is the best approach
let doc = schema.parse_document(r#"{"id": null}"#).unwrap();
assert_eq!(
&Value::Str("null".to_string()),
doc.get_first(text_field).unwrap()
);
}

#[test]
fn test_to_number_coercion() {
let mut schema_builder = Schema::builder();
let i64_field = schema_builder.add_i64_field("i64", COERCE);
let u64_field = schema_builder.add_u64_field("u64", COERCE);
let f64_field = schema_builder.add_f64_field("f64", COERCE);
let schema = schema_builder.build();
let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::I64(100), doc.get_first(i64_field).unwrap());
assert_eq!(&Value::U64(100), doc.get_first(u64_field).unwrap());
assert_eq!(&Value::F64(100.0), doc.get_first(f64_field).unwrap());
}

#[test]
fn test_to_bool_coercion() {
let mut schema_builder = Schema::builder();
let bool_field = schema_builder.add_bool_field("bool", COERCE);
let schema = schema_builder.build();
let doc_json = r#"{"bool": "true"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::Bool(true), doc.get_first(bool_field).unwrap());

let doc_json = r#"{"bool": "false"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::Bool(false), doc.get_first(bool_field).unwrap());
}

#[test]
fn test_to_number_no_coercion() {
let mut schema_builder = Schema::builder();
schema_builder.add_i64_field("i64", NumericOptions::default());
schema_builder.add_u64_field("u64", NumericOptions::default());
schema_builder.add_f64_field("f64", NumericOptions::default());
let schema = schema_builder.build();
assert!(schema
.parse_document(r#"{"u64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a u64"));

assert!(schema
.parse_document(r#"{"i64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a i64"));

assert!(schema
.parse_document(r#"{"f64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a f64"));
}

#[test]
fn test_deserialize_json_date() {
let mut schema_builder = Schema::builder();
Expand Down
12 changes: 12 additions & 0 deletions src/schema/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ pub const INDEXED: SchemaFlagList<IndexedFlag, ()> = SchemaFlagList {
tail: (),
};

#[derive(Clone)]
pub struct CoerceFlag;
/// Flag to mark the field as coerced.
///
/// `COERCE` will try to convert values into its value type if they don't match.
///
/// See [fast fields](`crate::fastfield`).
pub const COERCE: SchemaFlagList<CoerceFlag, ()> = SchemaFlagList {
head: CoerceFlag,
tail: (),
};

#[derive(Clone)]
pub struct FastFlag;
/// Flag to mark the field as a fast field (similar to Lucene's DocValues)
Expand Down
1 change: 1 addition & 0 deletions src/schema/json_object_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub struct JsonObjectOptions {
/// `{"root": {"child": {"with": {"dot": "hello"}}}}`
/// and it can be search using the following query:
/// `root.child.with.dot:hello`
#[serde(default)]
expand_dots_enabled: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub use self::field::Field;
pub use self::field_entry::FieldEntry;
pub use self::field_type::{FieldType, Type};
pub use self::field_value::FieldValue;
pub use self::flags::{FAST, INDEXED, STORED};
pub use self::flags::{COERCE, FAST, INDEXED, STORED};
pub use self::index_record_option::IndexRecordOption;
pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
pub use self::json_object_options::JsonObjectOptions;
Expand Down
Loading

0 comments on commit faa706d

Please sign in to comment.