diff --git a/common/src/serialize.rs b/common/src/serialize.rs index 4d48e16a79..e89ade6535 100644 --- a/common/src/serialize.rs +++ b/common/src/serialize.rs @@ -247,6 +247,11 @@ pub mod test { fixed_size_test::(); } + #[test] + fn test_serialize_bool() { + fixed_size_test::(); + } + #[test] fn test_serialize_string() { assert_eq!(serialize_test(String::from("")), 1); diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 881cbc234e..ce16d756ad 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -160,6 +160,37 @@ impl FastValue for f64 { } } +impl FastValue for bool { + fn from_u64(val: u64) -> Self { + match val { + 0 => false, + _ => true, + } + } + + fn to_u64(&self) -> u64 { + match self { + false => 0, + true => 1, + } + } + + fn fast_field_cardinality(field_type: &FieldType) -> Option { + match *field_type { + FieldType::Bool(ref integer_options) => integer_options.get_fastfield_cardinality(), + _ => None, + } + } + + fn as_u64(&self) -> u64 { + *self as u64 + } + + fn to_type() -> Type { + Type::Bool + } +} + impl FastValue for DateTime { fn from_u64(timestamp_u64: u64) -> Self { let unix_timestamp = i64::from_u64(timestamp_u64); @@ -191,8 +222,9 @@ fn value_to_u64(value: &Value) -> u64 { Value::U64(val) => val.to_u64(), Value::I64(val) => val.to_u64(), Value::F64(val) => val.to_u64(), + Value::Bool(val) => val.to_u64(), Value::Date(val) => val.to_u64(), - _ => panic!("Expected a u64/i64/f64/date field, got {:?} ", value), + _ => panic!("Expected a u64/i64/f64/bool/date field, got {:?} ", value), } } @@ -788,6 +820,118 @@ mod tests { } Ok(()) } + + #[test] + pub fn test_fastfield_bool() { + let test_fastfield = DynamicFastFieldReader::::from(vec![true, false, true, false]); + assert_eq!(test_fastfield.get(0), true); + assert_eq!(test_fastfield.get(1), false); + assert_eq!(test_fastfield.get(2), true); + assert_eq!(test_fastfield.get(3), false); + } + + #[test] + pub fn test_fastfield_bool_small() -> crate::Result<()> { + let path = Path::new("test_bool"); + let directory: RamDirectory = RamDirectory::create(); + + let mut schema_builder = Schema::builder(); + schema_builder.add_bool_field("field_bool", FAST); + let schema = schema_builder.build(); + let field = schema.get_field("field_bool").unwrap(); + + { + let write: WritePtr = directory.open_write(path).unwrap(); + let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); + fast_field_writers.add_document(&doc!(field=>true)); + fast_field_writers.add_document(&doc!(field=>false)); + fast_field_writers.add_document(&doc!(field=>true)); + fast_field_writers.add_document(&doc!(field=>false)); + fast_field_writers + .serialize(&mut serializer, &HashMap::new(), None) + .unwrap(); + serializer.close().unwrap(); + } + let file = directory.open_read(&path).unwrap(); + assert_eq!(file.len(), 36); + let composite_file = CompositeFile::open(&file)?; + let file = composite_file.open_read(field).unwrap(); + let fast_field_reader = DynamicFastFieldReader::::open(file)?; + assert_eq!(fast_field_reader.get(0), true); + assert_eq!(fast_field_reader.get(1), false); + assert_eq!(fast_field_reader.get(2), true); + assert_eq!(fast_field_reader.get(3), false); + + Ok(()) + } + + #[test] + pub fn test_fastfield_bool_large() -> crate::Result<()> { + let path = Path::new("test_bool"); + let directory: RamDirectory = RamDirectory::create(); + + let mut schema_builder = Schema::builder(); + schema_builder.add_bool_field("field_bool", FAST); + let schema = schema_builder.build(); + let field = schema.get_field("field_bool").unwrap(); + + { + let write: WritePtr = directory.open_write(path).unwrap(); + let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); + for _ in 0..50 { + fast_field_writers.add_document(&doc!(field=>true)); + fast_field_writers.add_document(&doc!(field=>false)); + } + fast_field_writers + .serialize(&mut serializer, &HashMap::new(), None) + .unwrap(); + serializer.close().unwrap(); + } + let file = directory.open_read(&path).unwrap(); + assert_eq!(file.len(), 48); + let composite_file = CompositeFile::open(&file)?; + let file = composite_file.open_read(field).unwrap(); + let fast_field_reader = DynamicFastFieldReader::::open(file)?; + for i in 0..25 { + assert_eq!(fast_field_reader.get(i * 2), true); + assert_eq!(fast_field_reader.get(i * 2 + 1), false); + } + + Ok(()) + } + + #[test] + pub fn test_fastfield_bool_default_value() -> crate::Result<()> { + let path = Path::new("test_bool"); + let directory: RamDirectory = RamDirectory::create(); + + let mut schema_builder = Schema::builder(); + schema_builder.add_bool_field("field_bool", FAST); + let schema = schema_builder.build(); + let field = schema.get_field("field_bool").unwrap(); + + { + let write: WritePtr = directory.open_write(path).unwrap(); + let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); + let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); + let doc = Document::default(); + fast_field_writers.add_document(&doc); + fast_field_writers + .serialize(&mut serializer, &HashMap::new(), None) + .unwrap(); + serializer.close().unwrap(); + } + let file = directory.open_read(&path).unwrap(); + assert_eq!(file.len(), 35); + let composite_file = CompositeFile::open(&file)?; + let file = composite_file.open_read(field).unwrap(); + let fast_field_reader = DynamicFastFieldReader::::open(file)?; + assert_eq!(fast_field_reader.get(0), false); + + Ok(()) + } } #[cfg(all(test, feature = "unstable"))] diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 45d08a8b89..172f758e0e 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -226,6 +226,38 @@ mod tests { Ok(()) } + #[test] + fn test_multivalued_bool() -> crate::Result<()> { + let mut schema_builder = Schema::builder(); + let bool_field = schema_builder.add_bool_field( + "multifield", + NumericOptions::default().set_fast(Cardinality::MultiValues), + ); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(bool_field=> true, bool_field => false))?; + index_writer.add_document(doc!())?; + index_writer.add_document(doc!(bool_field=> false))?; + index_writer + .add_document(doc!(bool_field=> true, bool_field => true, bool_field => false))?; + index_writer.commit()?; + + let searcher = index.reader()?.searcher(); + let segment_reader = searcher.segment_reader(0); + let mut vals = Vec::new(); + let multi_value_reader = segment_reader.fast_fields().bools(bool_field).unwrap(); + multi_value_reader.get_vals(2, &mut vals); + assert_eq!(&vals, &[false]); + multi_value_reader.get_vals(0, &mut vals); + assert_eq!(&vals, &[true, false]); + multi_value_reader.get_vals(1, &mut vals); + assert!(vals.is_empty()); + multi_value_reader.get_vals(3, &mut vals); + assert_eq!(&vals, &[true, true, false]); + Ok(()) + } + fn test_multivalued_no_panic(ops: &[IndexingOp]) -> crate::Result<()> { let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field( diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index a844bfa03a..9f75baafdc 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -21,6 +21,7 @@ pub(crate) enum FastType { I64, U64, F64, + Bool, Date, } @@ -35,6 +36,9 @@ pub(crate) fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, FieldType::F64(options) => options .get_fastfield_cardinality() .map(|cardinality| (FastType::F64, cardinality)), + FieldType::Bool(options) => options + .get_fastfield_cardinality() + .map(|cardinality| (FastType::Bool, cardinality)), FieldType::Date(options) => options .get_fastfield_cardinality() .map(|cardinality| (FastType::Date, cardinality)), @@ -166,6 +170,14 @@ impl FastFieldReaders { self.typed_fast_field_reader(field) } + /// Returns the `bool` fast field reader reader associated to `field`. + /// + /// If `field` is not a bool fast field, this method returns an Error. + pub fn bool(&self, field: Field) -> crate::Result> { + self.check_type(field, FastType::Bool, Cardinality::SingleValue)?; + self.typed_fast_field_reader(field) + } + /// Returns a `u64s` multi-valued fast field reader reader associated to `field`. /// /// If `field` is not a u64 multi-valued fast field, this method returns an Error. @@ -198,6 +210,14 @@ impl FastFieldReaders { self.typed_fast_field_multi_reader(field) } + /// Returns a `bools` multi-valued fast field reader reader associated to `field`. + /// + /// If `field` is not a bool multi-valued fast field, this method returns an Error. + pub fn bools(&self, field: Field) -> crate::Result> { + self.check_type(field, FastType::Bool, Cardinality::MultiValues)?; + self.typed_fast_field_multi_reader(field) + } + /// Returns a `time::OffsetDateTime` multi-valued fast field reader reader associated to /// `field`. /// diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 51ec1f8404..c0e9b6982b 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -43,6 +43,7 @@ impl FastFieldsWriter { FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) + | FieldType::Bool(ref int_options) | FieldType::Date(ref int_options) => { match int_options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { @@ -75,7 +76,7 @@ impl FastFieldsWriter { bytes_value_writers.push(fast_field_writer); } } - _ => {} + FieldType::Str(_) | FieldType::JsonObject(_) => {} } } FastFieldsWriter { diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index c5050e1cf9..ff541b00c0 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1385,6 +1385,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED); let bytes_field = schema_builder.add_bytes_field("bytes", FAST | INDEXED | STORED); + let bool_field = schema_builder.add_bool_field("bool", FAST | INDEXED | STORED); let text_field = schema_builder.add_text_field( "text_field", TextOptions::default() @@ -1403,6 +1404,12 @@ mod tests { .set_fast(Cardinality::MultiValues) .set_stored(), ); + let multi_bools = schema_builder.add_bool_field( + "multi_bools", + NumericOptions::default() + .set_fast(Cardinality::MultiValues) + .set_stored(), + ); let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let settings = if sort_index { @@ -1435,6 +1442,9 @@ mod tests { bytes_field => id.to_le_bytes().as_slice(), multi_numbers=> id, multi_numbers => id, + bool_field => (id % 2u64) != 0, + multi_bools => (id % 2u64) != 0, + multi_bools => (id % 2u64) == 0, text_field => id.to_string(), facet_field => facet, large_text_field=> LOREM @@ -1522,11 +1532,18 @@ mod tests { // multivalue fast field tests for segment_reader in searcher.segment_readers().iter() { let ff_reader = segment_reader.fast_fields().u64s(multi_numbers).unwrap(); + let bool_ff_reader = segment_reader.fast_fields().bools(multi_bools).unwrap(); for doc in segment_reader.doc_ids_alive() { let mut vals = vec![]; ff_reader.get_vals(doc, &mut vals); assert_eq!(vals.len(), 2); assert_eq!(vals[0], vals[1]); + + let mut bool_vals = vec![]; + bool_ff_reader.get_vals(doc, &mut bool_vals); + assert_eq!(bool_vals.len(), 2); + assert_ne!(bool_vals[0], bool_vals[1]); + assert!(expected_ids_and_num_occurrences.contains_key(&vals[0])); } } @@ -1557,6 +1574,18 @@ mod tests { .as_u64() .unwrap(); assert_eq!(id, id2); + let bool = store_reader + .get(doc_id) + .unwrap() + .get_first(bool_field) + .unwrap() + .as_bool() + .unwrap(); + let doc = store_reader.get(doc_id).unwrap(); + let mut bool2 = doc.get_all(multi_bools); + assert_eq!(bool, bool2.next().unwrap().as_bool().unwrap()); + assert_ne!(bool, bool2.next().unwrap().as_bool().unwrap()); + assert_eq!(None, bool2.next()) } } // test search diff --git a/src/indexer/json_term_writer.rs b/src/indexer/json_term_writer.rs index 503fc69a19..501be0cfc1 100644 --- a/src/indexer/json_term_writer.rs +++ b/src/indexer/json_term_writer.rs @@ -123,8 +123,7 @@ fn index_json_value<'a>( match json_value { serde_json::Value::Null => {} serde_json::Value::Bool(val_bool) => { - let bool_u64 = if *val_bool { 1u64 } else { 0u64 }; - json_term_writer.set_fast_value(bool_u64); + json_term_writer.set_fast_value(*val_bool); postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); } serde_json::Value::Number(number) => { @@ -220,6 +219,9 @@ pub(crate) fn convert_to_fast_value_and_get_term( if let Ok(f64_val) = str::parse::(phrase) { return Some(set_fastvalue_and_get_term(json_term_writer, f64_val)); } + if let Ok(bool_val) = str::parse::(phrase) { + return Some(set_fastvalue_and_get_term(json_term_writer, bool_val)); + } None } // helper function to generate a Term from a json fastvalue @@ -434,6 +436,20 @@ mod tests { ) } + #[test] + fn test_bool_term() { + let field = Field::from_field_id(1); + let mut term = Term::new(); + term.set_field(Type::Json, field); + let mut json_writer = JsonTermWriter::wrap(&mut term); + json_writer.push_path_segment("color"); + json_writer.set_fast_value(true); + assert_eq!( + json_writer.term().as_slice(), + b"\x00\x00\x00\x01jcolor\x00o\x00\x00\x00\x00\x00\x00\x00\x01" + ) + } + #[test] fn test_push_after_set_path_segment() { let field = Field::from_field_id(1); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 455ab7b693..007934ba53 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -298,6 +298,7 @@ impl IndexMerger { FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) + | FieldType::Bool(ref options) | FieldType::Date(ref options) => match options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { self.write_single_fast_field(field, fast_field_serializer, doc_id_mapping)?; @@ -312,7 +313,7 @@ impl IndexMerger { self.write_bytes_fast_field(field, fast_field_serializer, doc_id_mapping)?; } } - _ => { + FieldType::JsonObject(_) | FieldType::Facet(_) | FieldType::Str(_) => { // We don't handle json fast field for the moment // They can be implemented using what is done // for facets in the future diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index c1ae1c6e88..94469c5bca 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -266,6 +266,13 @@ impl SegmentWriter { postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); } } + FieldType::Bool(_) => { + for value in values { + let bool_val = value.as_bool().ok_or_else(make_schema_error)?; + term_buffer.set_bool(bool_val); + postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); + } + } FieldType::Bytes(_) => { for value in values { let bytes = value.as_bytes().ok_or_else(make_schema_error)?; @@ -477,6 +484,7 @@ mod tests { r#"{ "toto": "titi", "float": -0.2, + "bool": true, "unsigned": 1, "signed": -2, "complexobject": { @@ -519,6 +527,13 @@ mod tests { let mut term_stream = term_dict.stream().unwrap(); let mut json_term_writer = JsonTermWriter::wrap(&mut term); + + json_term_writer.push_path_segment("bool"); + json_term_writer.set_fast_value(true); + assert!(term_stream.advance()); + assert_eq!(term_stream.key(), json_term_writer.term().value_bytes()); + + json_term_writer.pop_path_segment(); json_term_writer.push_path_segment("complexobject"); json_term_writer.push_path_segment("field.with.dot"); json_term_writer.set_fast_value(1u64); diff --git a/src/postings/per_field_postings_writer.rs b/src/postings/per_field_postings_writer.rs index 04966ab420..61d02752f7 100644 --- a/src/postings/per_field_postings_writer.rs +++ b/src/postings/per_field_postings_writer.rs @@ -47,6 +47,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box Box::new(SpecializedPostingsWriter::::default()), diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index debd032087..eec2af2631 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -82,7 +82,11 @@ pub(crate) fn serialize_postings( .collect(); unordered_term_mappings.insert(field, mapping); } - FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {} + FieldType::U64(_) + | FieldType::I64(_) + | FieldType::F64(_) + | FieldType::Date(_) + | FieldType::Bool(_) => {} FieldType::Bytes(_) => {} FieldType::JsonObject(_) => {} } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index d0141833dc..597dca07c4 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::num::{ParseFloatError, ParseIntError}; use std::ops::Bound; -use std::str::FromStr; +use std::str::{FromStr, ParseBoolError}; use tantivy_query_grammar::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral}; @@ -46,6 +46,10 @@ pub enum QueryParserError { /// is not a f64. #[error("Invalid query: Only excluding terms given")] ExpectedFloat(#[from] ParseFloatError), + /// The query contains a term for a bool field, but the value + /// is not a bool. + #[error("Expected a bool value: '{0:?}'")] + ExpectedBool(#[from] ParseBoolError), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) #[error("Invalid query: Only excluding terms given")] AllButQueryForbidden, @@ -346,6 +350,10 @@ impl QueryParser { let val: f64 = f64::from_str(phrase)?; Ok(Term::from_field_f64(field, val)) } + FieldType::Bool(_) => { + let val: bool = bool::from_str(phrase)?; + Ok(Term::from_field_bool(field, val)) + } FieldType::Date(_) => { let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; Ok(Term::from_field_date(field, DateTime::from_utc(dt))) @@ -426,6 +434,11 @@ impl QueryParser { let f64_term = Term::from_field_f64(field, val); Ok(vec![LogicalLiteral::Term(f64_term)]) } + FieldType::Bool(_) => { + let val: bool = bool::from_str(phrase)?; + let bool_term = Term::from_field_bool(field, val); + Ok(vec![LogicalLiteral::Term(bool_term)]) + } FieldType::Date(_) => { let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; let dt_term = Term::from_field_date(field, DateTime::from_utc(dt)); @@ -797,6 +810,8 @@ mod test { schema_builder.add_bytes_field("bytes_not_indexed", STORED); schema_builder.add_json_field("json", TEXT); schema_builder.add_json_field("json_not_indexed", STORED); + schema_builder.add_bool_field("bool", INDEXED); + schema_builder.add_bool_field("notindexed_bool", STORED); schema_builder.build() } @@ -912,6 +927,10 @@ mod test { is_not_indexed_err("notindexed_i64:-234324"), Some(String::from("notindexed_i64")) ); + assert_eq!( + is_not_indexed_err("notindexed_bool:true"), + Some(String::from("notindexed_bool")) + ); } #[test] @@ -993,6 +1012,18 @@ mod test { ); } + #[test] + fn test_parse_bool() { + test_parse_query_to_logical_ast_helper( + "bool:true", + &format!( + "{:?}", + Term::from_field_bool(Field::from_field_id(16u32), true), + ), + false, + ); + } + #[test] fn test_parse_bytes_not_indexed() { let error = parse_query_to_logical_ast("bytes_not_indexed:aaa", false).unwrap_err(); @@ -1037,6 +1068,15 @@ mod test { ); } + #[test] + fn test_json_field_possibly_a_bool() { + test_parse_query_to_logical_ast_helper( + "json.titi:true", + r#"(Term(type=Json, field=14, path=titi, vtype=Bool, true) Term(type=Json, field=14, path=titi, vtype=Str, "true"))"#, + true, + ); + } + #[test] fn test_json_field_not_indexed() { let error = parse_query_to_logical_ast("json_not_indexed.titi:hello", false).unwrap_err(); @@ -1286,6 +1326,17 @@ mod test { ); } + #[test] + pub fn test_query_parser_expected_bool() { + let query_parser = make_query_parser(); + assert_matches!( + query_parser.parse_query("bool:brie"), + Err(QueryParserError::ExpectedBool(_)) + ); + assert!(query_parser.parse_query("bool:\"true\"").is_ok()); + assert!(query_parser.parse_query("bool:\"false\"").is_ok()); + } + #[test] pub fn test_query_parser_expected_date() { let query_parser = make_query_parser(); diff --git a/src/schema/document.rs b/src/schema/document.rs index b4b2cc6ce5..2fa6ee3d4d 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -110,6 +110,11 @@ impl Document { self.add_field_value(field, value); } + /// Add a bool field + pub fn add_bool(&mut self, field: Field, value: bool) { + self.add_field_value(field, value); + } + /// Add a date field with unspecified time zone offset pub fn add_date(&mut self, field: Field, value: DateTime) { self.add_field_value(field, value); diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index b490162191..863ad2c5ab 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -49,6 +49,11 @@ impl FieldEntry { Self::new(field_name, FieldType::F64(f64_options)) } + /// Creates a new bool field entry. + pub fn new_bool(field_name: String, bool_options: NumericOptions) -> FieldEntry { + Self::new(field_name, FieldType::Bool(bool_options)) + } + /// Creates a new date field entry. pub fn new_date(field_name: String, date_options: NumericOptions) -> FieldEntry { Self::new(field_name, FieldType::Date(date_options)) @@ -102,7 +107,8 @@ impl FieldEntry { FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) - | FieldType::Date(ref options) => options.is_stored(), + | FieldType::Date(ref options) + | FieldType::Bool(ref options) => options.is_stored(), FieldType::Str(ref options) => options.is_stored(), FieldType::Facet(ref options) => options.is_stored(), FieldType::Bytes(ref options) => options.is_stored(), diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 39798e45e8..1080d83fc1 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -46,6 +46,8 @@ pub enum Type { I64 = b'i', /// `f64` F64 = b'f', + /// `bool` + Bool = b'o', /// `date(i64) timestamp` Date = b'd', /// `tantivy::schema::Facet`. Passed as a string in JSON. @@ -56,11 +58,12 @@ pub enum Type { Json = b'j', } -const ALL_TYPES: [Type; 8] = [ +const ALL_TYPES: [Type; 9] = [ Type::Str, Type::U64, Type::I64, Type::F64, + Type::Bool, Type::Date, Type::Facet, Type::Bytes, @@ -86,6 +89,7 @@ impl Type { Type::U64 => "U64", Type::I64 => "I64", Type::F64 => "F64", + Type::Bool => "Bool", Type::Date => "Date", Type::Facet => "Facet", Type::Bytes => "Bytes", @@ -101,6 +105,7 @@ impl Type { b'u' => Some(Type::U64), b'i' => Some(Type::I64), b'f' => Some(Type::F64), + b'o' => Some(Type::Bool), b'd' => Some(Type::Date), b'h' => Some(Type::Facet), b'b' => Some(Type::Bytes), @@ -125,6 +130,8 @@ pub enum FieldType { I64(NumericOptions), /// 64-bits float 64 field type configuration F64(NumericOptions), + /// Bool field type configuration + Bool(NumericOptions), /// Signed 64-bits Date 64 field type configuration, Date(NumericOptions), /// Hierachical Facet @@ -143,6 +150,7 @@ impl FieldType { FieldType::U64(_) => Type::U64, FieldType::I64(_) => Type::I64, FieldType::F64(_) => Type::F64, + FieldType::Bool(_) => Type::Bool, FieldType::Date(_) => Type::Date, FieldType::Facet(_) => Type::Facet, FieldType::Bytes(_) => Type::Bytes, @@ -156,7 +164,8 @@ impl FieldType { FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(), FieldType::U64(ref int_options) | FieldType::I64(ref int_options) - | FieldType::F64(ref int_options) => int_options.is_indexed(), + | FieldType::F64(ref int_options) + | FieldType::Bool(ref int_options) => int_options.is_indexed(), FieldType::Date(ref date_options) => date_options.is_indexed(), FieldType::Facet(ref _facet_options) => true, FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), @@ -193,7 +202,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => int_options.get_fastfield_cardinality().is_some(), + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality().is_some(), FieldType::Facet(_) => true, FieldType::JsonObject(_) => false, } @@ -209,7 +219,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => int_options.fieldnorms(), + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => int_options.fieldnorms(), FieldType::Facet(_) => false, FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(), FieldType::JsonObject(ref _json_object_options) => false, @@ -232,7 +243,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => { + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => { if int_options.is_indexed() { Some(IndexRecordOption::Basic) } else { @@ -277,6 +289,10 @@ impl FieldType { json: JsonValue::String(field_text), }) } + FieldType::Bool(_) => Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::String(field_text), + }), FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), FieldType::Bytes(_) => base64::decode(&field_text) .map(Value::Bytes) @@ -318,6 +334,10 @@ impl FieldType { }) } } + FieldType::Bool(_) => Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::Number(field_val_num), + }), FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => { Err(ValueParsingError::TypeError { expected: "a string", @@ -348,6 +368,13 @@ impl FieldType { json: JsonValue::Object(json_map), }), }, + JsonValue::Bool(json_bool_val) => match self { + FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)), + _ => Err(ValueParsingError::TypeError { + expected: self.value_type().name(), + json: JsonValue::Bool(json_bool_val), + }), + }, _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: json.clone(), diff --git a/src/schema/flags.rs b/src/schema/flags.rs index d82b434c64..6bb54e3fa8 100644 --- a/src/schema/flags.rs +++ b/src/schema/flags.rs @@ -22,9 +22,9 @@ pub const STORED: SchemaFlagList = SchemaFlagList { pub struct IndexedFlag; /// Flag to mark the field as indexed. An indexed field is searchable and has a fieldnorm. /// -/// The `INDEXED` flag can only be used when building `NumericOptions` (`u64`, `i64` and `f64` -/// fields) Of course, text fields can also be indexed... But this is expressed by using either the -/// `STRING` (untokenized) or `TEXT` (tokenized with the english tokenizer) flags. +/// The `INDEXED` flag can only be used when building `NumericOptions` (`u64`, `i64`, `f64` and +/// `bool` fields) Of course, text fields can also be indexed... But this is expressed by using +/// either the `STRING` (untokenized) or `TEXT` (tokenized with the english tokenizer) flags. pub const INDEXED: SchemaFlagList = SchemaFlagList { head: IndexedFlag, tail: (), @@ -36,7 +36,8 @@ pub struct FastFlag; /// /// Fast fields can be random-accessed rapidly. Fields useful for scoring, filtering /// or collection should be mark as fast fields. -/// The `FAST` flag can only be used when building `NumericOptions` (`u64`, `i64` and `f64` fields) +/// The `FAST` flag can only be used when building `NumericOptions` (`u64`, `i64`, `f64` and `bool` +/// fields) pub const FAST: SchemaFlagList = SchemaFlagList { head: FastFlag, tail: (), diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 235d0412db..03e0f879d2 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -102,6 +102,26 @@ impl SchemaBuilder { self.add_field(field_entry) } + /// Adds a new bool field. + /// Returns the associated field handle + /// + /// # Caution + /// + /// Appending two fields with the same name + /// will result in the shadowing of the first + /// by the second one. + /// The first field will get a field id + /// but only the second one will be indexed + pub fn add_bool_field>( + &mut self, + field_name_str: &str, + field_options: T, + ) -> Field { + let field_name = String::from(field_name_str); + let field_entry = FieldEntry::new_bool(field_name, field_options.into()); + self.add_field(field_entry) + } + /// Adds a new date field. /// Returns the associated field handle /// Internally, Tantivy simply stores dates as i64 UTC timestamps, @@ -446,6 +466,9 @@ mod tests { .set_indexed() .set_fieldnorm() .set_fast(Cardinality::SingleValue); + let is_read_options = NumericOptions::default() + .set_stored() + .set_fast(Cardinality::SingleValue); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field( "author", @@ -458,6 +481,7 @@ mod tests { schema_builder.add_u64_field("count", count_options); schema_builder.add_i64_field("popularity", popularity_options); schema_builder.add_f64_field("score", score_options); + schema_builder.add_bool_field("is_read", is_read_options); let schema = schema_builder.build(); let schema_json = serde_json::to_string_pretty(&schema).unwrap(); let expected = r#"[ @@ -516,6 +540,16 @@ mod tests { "fast": "single", "stored": false } + }, + { + "name": "is_read", + "type": "bool", + "options": { + "indexed": false, + "fieldnorms": false, + "fast": "single", + "stored": true + } } ]"#; assert_eq!(schema_json, expected); @@ -548,6 +582,11 @@ mod tests { assert_eq!("score", field_entry.name()); assert_eq!(4, field.field_id()); } + { + let (field, field_entry) = fields.next().unwrap(); + assert_eq!("is_read", field_entry.name()); + assert_eq!(5, field.field_id()); + } assert!(fields.next().is_none()); } @@ -557,14 +596,19 @@ mod tests { let count_options = NumericOptions::default() .set_stored() .set_fast(Cardinality::SingleValue); + let is_read_options = NumericOptions::default() + .set_stored() + .set_fast(Cardinality::SingleValue); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u64_field("count", count_options); + schema_builder.add_bool_field("is_read", is_read_options); let schema = schema_builder.build(); let doc_json = r#"{ "title": "my title", "author": "fulmicoton", - "count": 4 + "count": 4, + "is_read": true }"#; let doc = schema.parse_document(doc_json).unwrap(); diff --git a/src/schema/term.rs b/src/schema/term.rs index 93a5806b2e..a485ef8960 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -69,6 +69,11 @@ impl Term { Term::from_fast_value(field, &val) } + /// Builds a term given a field, and a f64-value + pub fn from_field_bool(field: Field, val: bool) -> Term { + Term::from_fast_value(field, &val) + } + /// Builds a term given a field, and a DateTime value pub fn from_field_date(field: Field, val: DateTime) -> Term { Term::from_fast_value(field, &val) @@ -135,6 +140,11 @@ impl Term { self.set_fast_value(val); } + /// Sets a `bool` value in the term. + pub fn set_bool(&mut self, val: bool) { + self.set_fast_value(val); + } + /// Sets the value of a `Bytes` field. pub fn set_bytes(&mut self, bytes: &[u8]) { self.0.resize(5, 0u8); @@ -262,6 +272,14 @@ where B: AsRef<[u8]> self.get_fast_type::() } + /// Returns the `bool` value stored in a term. + /// + /// Returns None if the term is not of the bool type, or if the term byte representation + /// is invalid. + pub fn as_bool(&self) -> Option { + self.get_fast_type::() + } + /// Returns the `Date` value stored in a term. /// /// Returns None if the term is not of the Date type, or if the term byte representation @@ -372,6 +390,9 @@ fn debug_value_bytes(typ: Type, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Re Type::F64 => { write_opt(f, get_fast_type::(bytes))?; } + Type::Bool => { + write_opt(f, get_fast_type::(bytes))?; + } // TODO pretty print these types too. Type::Date => { write_opt(f, get_fast_type::(bytes))?; @@ -437,4 +458,15 @@ mod tests { assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN); assert_eq!(term.as_u64(), Some(983u64)) } + + #[test] + pub fn test_term_bool() { + let mut schema_builder = Schema::builder(); + let bool_field = schema_builder.add_bool_field("bool", INDEXED); + let term = Term::from_field_bool(bool_field, true); + assert_eq!(term.field(), bool_field); + assert_eq!(term.typ(), Type::Bool); + assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN); + assert_eq!(term.as_bool(), Some(true)) + } } diff --git a/src/schema/value.rs b/src/schema/value.rs index fb2807958a..d5e4f72cde 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -22,6 +22,8 @@ pub enum Value { I64(i64), /// 64-bits Float `f64` F64(f64), + /// Bool value + Bool(bool), /// Date/time with second precision Date(DateTime), /// Facet @@ -43,6 +45,7 @@ impl Serialize for Value { Value::U64(u) => serializer.serialize_u64(u), Value::I64(u) => serializer.serialize_i64(u), Value::F64(u) => serializer.serialize_f64(u), + Value::Bool(b) => serializer.serialize_bool(b), Value::Date(ref date) => time::serde::rfc3339::serialize(&date.into_utc(), serializer), Value::Facet(ref facet) => facet.serialize(serializer), Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes), @@ -75,6 +78,10 @@ impl<'de> Deserialize<'de> for Value { Ok(Value::F64(v)) } + fn visit_bool(self, v: bool) -> Result { + Ok(Value::Bool(v)) + } + fn visit_str(self, v: &str) -> Result { Ok(Value::Str(v.to_owned())) } @@ -151,6 +158,17 @@ impl Value { } } + /// Returns the bool value, provided the value is of the `Bool` type. + /// + /// Return None if the value is not of type `Bool`. + pub fn as_bool(&self) -> Option { + if let Value::Bool(value) = self { + Some(*value) + } else { + None + } + } + /// Returns the Date-value, provided the value is of the `Date` type. /// /// Returns None if the value is not of type `Date`. @@ -209,6 +227,12 @@ impl From for Value { } } +impl From for Value { + fn from(b: bool) -> Self { + Value::Bool(b) + } +} + impl From for Value { fn from(dt: DateTime) -> Value { Value::Date(dt) @@ -281,6 +305,7 @@ mod binary_serialize { const F64_CODE: u8 = 6; const EXT_CODE: u8 = 7; const JSON_OBJ_CODE: u8 = 8; + const BOOL_CODE: u8 = 9; // extended types @@ -317,6 +342,10 @@ mod binary_serialize { F64_CODE.serialize(writer)?; f64_to_u64(*val).serialize(writer) } + Value::Bool(ref val) => { + BOOL_CODE.serialize(writer)?; + val.serialize(writer) + } Value::Date(ref val) => { DATE_CODE.serialize(writer)?; let DateTime { unix_timestamp } = val; @@ -357,6 +386,10 @@ mod binary_serialize { let value = u64_to_f64(u64::deserialize(reader)?); Ok(Value::F64(value)) } + BOOL_CODE => { + let value = bool::deserialize(reader)?; + Ok(Value::Bool(value)) + } DATE_CODE => { let unix_timestamp = i64::deserialize(reader)?; Ok(Value::Date(DateTime::from_unix_timestamp(unix_timestamp)))