Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add coerce option for text and numbers types #1904

Merged
merged 3 commits into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 179 additions & 15 deletions src/schema/field_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,16 +329,66 @@ impl FieldType {
Ok(DateTime::from_utc(dt_with_fixed_tz).into())
}
FieldType::Str(_) => Ok(Value::Str(field_text)),
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) => {
Err(ValueParsingError::TypeError {
expected: "an integer",
json: JsonValue::String(field_text),
})
FieldType::U64(opt) => {
if opt.should_coerce() {
PSeitz marked this conversation as resolved.
Show resolved Hide resolved
Ok(Value::U64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a u64 or a u64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a u64",
json: JsonValue::String(field_text),
})
}
}
FieldType::I64(opt) => {
if opt.should_coerce() {
Ok(Value::I64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a i64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a i64",
json: JsonValue::String(field_text),
})
}
}
FieldType::F64(opt) => {
if opt.should_coerce() {
Ok(Value::F64(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a f64 or a f64 as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a f64",
json: JsonValue::String(field_text),
})
}
}
FieldType::Bool(opt) => {
if opt.should_coerce() {
Ok(Value::Bool(field_text.parse().map_err(|_| {
ValueParsingError::TypeError {
expected: "a i64 or a bool as string",
json: JsonValue::String(field_text),
}
})?))
} else {
Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::String(field_text),
})
}
}
FieldType::Bool(_) => Err(ValueParsingError::TypeError {
expected: "a boolean",
json: JsonValue::String(field_text),
}),
FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))),
FieldType::Bytes(_) => BASE64
.decode(&field_text)
Expand Down Expand Up @@ -395,12 +445,20 @@ impl FieldType {
expected: "a boolean",
json: JsonValue::Number(field_val_num),
}),
FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
})
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(field_val_num.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
})
}
}
FieldType::Facet(_) | FieldType::Bytes(_) => Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Number(field_val_num),
}),
FieldType::JsonObject(_) => Err(ValueParsingError::TypeError {
expected: "a json object",
json: JsonValue::Number(field_val_num),
Expand Down Expand Up @@ -431,11 +489,38 @@ impl FieldType {
},
JsonValue::Bool(json_bool_val) => match self {
FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)),
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str(json_bool_val.to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Bool(json_bool_val),
})
}
}
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Bool(json_bool_val),
}),
},
// Could also just filter them
JsonValue::Null => match self {
FieldType::Str(opt) => {
if opt.should_coerce() {
Ok(Value::Str("null".to_string()))
} else {
Err(ValueParsingError::TypeError {
expected: "a string",
json: JsonValue::Null,
})
}
}
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: JsonValue::Null,
}),
},
_ => Err(ValueParsingError::TypeError {
expected: self.value_type().name(),
json: json.clone(),
Expand All @@ -450,11 +535,90 @@ mod tests {

use super::FieldType;
use crate::schema::field_type::ValueParsingError;
use crate::schema::{Schema, TextOptions, Type, Value, INDEXED};
use crate::schema::{NumericOptions, Schema, TextOptions, Type, Value, COERCE, INDEXED};
use crate::time::{Date, Month, PrimitiveDateTime, Time};
use crate::tokenizer::{PreTokenizedString, Token};
use crate::{DateTime, Document};

#[test]
fn test_to_string_coercion() {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("id", COERCE);
let schema = schema_builder.build();
let doc = schema.parse_document(r#"{"id": 100}"#).unwrap();
assert_eq!(
&Value::Str("100".to_string()),
doc.get_first(text_field).unwrap()
);

let doc = schema.parse_document(r#"{"id": true}"#).unwrap();
assert_eq!(
&Value::Str("true".to_string()),
doc.get_first(text_field).unwrap()
);

// Not sure if this null coercion is the best approach
let doc = schema.parse_document(r#"{"id": null}"#).unwrap();
assert_eq!(
&Value::Str("null".to_string()),
doc.get_first(text_field).unwrap()
);
}

#[test]
fn test_to_number_coercion() {
let mut schema_builder = Schema::builder();
let i64_field = schema_builder.add_i64_field("i64", COERCE);
let u64_field = schema_builder.add_u64_field("u64", COERCE);
let f64_field = schema_builder.add_f64_field("f64", COERCE);
let schema = schema_builder.build();
let doc_json = r#"{"i64": "100", "u64": "100", "f64": "100"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::I64(100), doc.get_first(i64_field).unwrap());
assert_eq!(&Value::U64(100), doc.get_first(u64_field).unwrap());
assert_eq!(&Value::F64(100.0), doc.get_first(f64_field).unwrap());
}

#[test]
fn test_to_bool_coercion() {
let mut schema_builder = Schema::builder();
let bool_field = schema_builder.add_bool_field("bool", COERCE);
let schema = schema_builder.build();
let doc_json = r#"{"bool": "true"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::Bool(true), doc.get_first(bool_field).unwrap());

let doc_json = r#"{"bool": "false"}"#;
let doc = schema.parse_document(doc_json).unwrap();
assert_eq!(&Value::Bool(false), doc.get_first(bool_field).unwrap());
}

#[test]
fn test_to_number_no_coercion() {
let mut schema_builder = Schema::builder();
schema_builder.add_i64_field("i64", NumericOptions::default());
schema_builder.add_u64_field("u64", NumericOptions::default());
schema_builder.add_f64_field("f64", NumericOptions::default());
let schema = schema_builder.build();
assert!(schema
.parse_document(r#"{"u64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a u64"));

assert!(schema
.parse_document(r#"{"i64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a i64"));

assert!(schema
.parse_document(r#"{"f64": "100"}"#)
.unwrap_err()
.to_string()
.contains("a f64"));
}

#[test]
fn test_deserialize_json_date() {
let mut schema_builder = Schema::builder();
Expand Down
12 changes: 12 additions & 0 deletions src/schema/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ pub const INDEXED: SchemaFlagList<IndexedFlag, ()> = SchemaFlagList {
tail: (),
};

#[derive(Clone)]
pub struct CoerceFlag;
/// Flag to mark the field as coerced.
///
/// `COERCE` will try to convert values into its value type if they don't match.
///
/// See [fast fields](`crate::fastfield`).
pub const COERCE: SchemaFlagList<CoerceFlag, ()> = SchemaFlagList {
head: CoerceFlag,
tail: (),
};

#[derive(Clone)]
pub struct FastFlag;
/// Flag to mark the field as a fast field (similar to Lucene's DocValues)
Expand Down
1 change: 1 addition & 0 deletions src/schema/json_object_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub struct JsonObjectOptions {
/// `{"root": {"child": {"with": {"dot": "hello"}}}}`
/// and it can be search using the following query:
/// `root.child.with.dot:hello`
#[serde(default)]
expand_dots_enabled: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub use self::field::Field;
pub use self::field_entry::FieldEntry;
pub use self::field_type::{FieldType, Type};
pub use self::field_value::FieldValue;
pub use self::flags::{FAST, INDEXED, STORED};
pub use self::flags::{COERCE, FAST, INDEXED, STORED};
pub use self::index_record_option::IndexRecordOption;
pub use self::ip_options::{IntoIpv6Addr, IpAddrOptions};
pub use self::json_object_options::JsonObjectOptions;
Expand Down
Loading