From e758080465fee5cd9dfdb032a345fd7988df8914 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Thu, 17 Nov 2022 16:49:49 +0100 Subject: [PATCH] add support for TermSetQuery in query parser (#1683) --- query-grammar/src/query_grammar.rs | 23 ++++++++++- query-grammar/src/user_input_ast.rs | 17 +++++++++ src/query/query_parser/logical_ast.rs | 26 +++++++++++++ src/query/query_parser/query_parser.rs | 53 +++++++++++++++++++++++++- src/query/set_query.rs | 30 ++++++++++++++- 5 files changed, 145 insertions(+), 4 deletions(-) diff --git a/query-grammar/src/query_grammar.rs b/query-grammar/src/query_grammar.rs index 38f6d3cbc5..baf460e62e 100644 --- a/query-grammar/src/query_grammar.rs +++ b/query-grammar/src/query_grammar.rs @@ -5,7 +5,8 @@ use combine::parser::range::{take_while, take_while1}; use combine::parser::repeat::escaped; use combine::parser::Parser; use combine::{ - attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value, + attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by, + skip_many1, value, }; use once_cell::sync::Lazy; use regex::Regex; @@ -264,6 +265,17 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> { }) } +/// Function that parses a set out of a Stream +/// Supports ranges like: `IN [val1 val2 val3]` +fn set<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> { + let term_list = between(char('['), char(']'), sep_by(term_val(), spaces())); + + let set_content = ((string("IN"), spaces()), term_list).map(|(_, elements)| elements); + + (optional(attempt(field_name().skip(spaces()))), set_content) + .map(|(field, elements)| UserInputLeaf::Set { field, elements }) +} + fn negate(expr: UserInputAst) -> UserInputAst { expr.unary(Occur::MustNot) } @@ -278,6 +290,7 @@ fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> { string("NOT").skip(spaces1()).with(leaf()).map(negate), )) .or(attempt(range().map(UserInputAst::from))) + .or(attempt(set().map(UserInputAst::from))) .or(literal().map(UserInputAst::from)) .parse_stream(input) .into_result() @@ -747,6 +760,14 @@ mod test { test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")"); } + #[test] + fn test_parse_test_query_set() { + test_parse_query_to_ast_helper("abc: IN [a b c]", r#""abc": IN ["a" "b" "c"]"#); + test_parse_query_to_ast_helper("abc: IN [1]", r#""abc": IN ["1"]"#); + test_parse_query_to_ast_helper("abc: IN []", r#""abc": IN []"#); + test_parse_query_to_ast_helper("IN [1 2]", r#"IN ["1" "2"]"#); + } + #[test] fn test_parse_test_query_other() { test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")"); diff --git a/query-grammar/src/user_input_ast.rs b/query-grammar/src/user_input_ast.rs index 3130ddbbe6..c020b1d6a7 100644 --- a/query-grammar/src/user_input_ast.rs +++ b/query-grammar/src/user_input_ast.rs @@ -12,6 +12,10 @@ pub enum UserInputLeaf { lower: UserInputBound, upper: UserInputBound, }, + Set { + field: Option, + elements: Vec, + }, } impl Debug for UserInputLeaf { @@ -31,6 +35,19 @@ impl Debug for UserInputLeaf { upper.display_upper(formatter)?; Ok(()) } + UserInputLeaf::Set { field, elements } => { + if let Some(ref field) = field { + write!(formatter, "\"{}\": ", field)?; + } + write!(formatter, "IN [")?; + for (i, element) in elements.iter().enumerate() { + if i != 0 { + write!(formatter, " ")?; + } + write!(formatter, "\"{}\"", element)?; + } + write!(formatter, "]") + } UserInputLeaf::All => write!(formatter, "*"), } } diff --git a/src/query/query_parser/logical_ast.rs b/src/query/query_parser/logical_ast.rs index 2eb75e675a..2d39923aee 100644 --- a/src/query/query_parser/logical_ast.rs +++ b/src/query/query_parser/logical_ast.rs @@ -15,6 +15,11 @@ pub enum LogicalLiteral { lower: Bound, upper: Bound, }, + Set { + field: Field, + value_type: Type, + elements: Vec, + }, All, } @@ -87,6 +92,27 @@ impl fmt::Debug for LogicalLiteral { ref upper, .. } => write!(formatter, "({:?} TO {:?})", lower, upper), + LogicalLiteral::Set { ref elements, .. } => { + const MAX_DISPLAYED: usize = 10; + + write!(formatter, "IN [")?; + for (i, element) in elements.iter().enumerate() { + if i == 0 { + write!(formatter, "{:?}", element)?; + } else if i == MAX_DISPLAYED - 1 { + write!( + formatter, + ", {:?}, ... ({} more)", + element, + elements.len() - i - 1 + )?; + break; + } else { + write!(formatter, ", {:?}", element)?; + } + } + write!(formatter, "]") + } LogicalLiteral::All => write!(formatter, "*"), } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 19f3cf21c2..a7f864e96a 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -13,7 +13,7 @@ use crate::indexer::{ }; use crate::query::{ AllQuery, BooleanQuery, BoostQuery, EmptyQuery, Occur, PhraseQuery, Query, RangeQuery, - TermQuery, + TermQuery, TermSetQuery, }; use crate::schema::{ Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, Schema, Term, Type, @@ -685,6 +685,31 @@ impl QueryParser { })); Ok(logical_ast) } + UserInputLeaf::Set { + field: full_field_opt, + elements, + } => { + let full_path = full_field_opt.ok_or_else(|| { + QueryParserError::UnsupportedQuery( + "Set query need to target a specific field.".to_string(), + ) + })?; + let (field, json_path) = self + .split_full_path(&full_path) + .ok_or_else(|| QueryParserError::FieldDoesNotExist(full_path.clone()))?; + let field_entry = self.schema.get_field_entry(field); + let value_type = field_entry.field_type().value_type(); + let logical_ast = LogicalAst::Leaf(Box::new(LogicalLiteral::Set { + elements: elements + .into_iter() + .map(|element| self.compute_boundary_term(field, json_path, &element)) + .collect::, _>>()?, + + field, + value_type, + })); + Ok(logical_ast) + } } } } @@ -703,6 +728,7 @@ fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { } => Box::new(RangeQuery::new_term_bounds( field, value_type, &lower, &upper, )), + LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)), LogicalLiteral::All => Box::new(AllQuery), } } @@ -1563,4 +1589,29 @@ mod test { false, ); } + + #[test] + pub fn test_term_set_query() { + test_parse_query_to_logical_ast_helper( + "title: IN [a b cd]", + r#"IN [Term(type=Str, field=0, "a"), Term(type=Str, field=0, "b"), Term(type=Str, field=0, "cd")]"#, + false, + ); + test_parse_query_to_logical_ast_helper( + "bytes: IN [AA== ABA= ABCD]", + r#"IN [Term(type=Bytes, field=12, [0]), Term(type=Bytes, field=12, [0, 16]), Term(type=Bytes, field=12, [0, 16, 131])]"#, + false, + ); + test_parse_query_to_logical_ast_helper( + "signed: IN [1 2 -3]", + r#"IN [Term(type=I64, field=2, 1), Term(type=I64, field=2, 2), Term(type=I64, field=2, -3)]"#, + false, + ); + + test_parse_query_to_logical_ast_helper( + "float: IN [1.1 2.2 -3.3]", + r#"IN [Term(type=F64, field=10, 1.1), Term(type=F64, field=10, 2.2), Term(type=F64, field=10, -3.3)]"#, + false, + ); + } } diff --git a/src/query/set_query.rs b/src/query/set_query.rs index 26df4c8287..65f97fd6c1 100644 --- a/src/query/set_query.rs +++ b/src/query/set_query.rs @@ -101,9 +101,8 @@ impl Automaton for SetDfaWrapper { #[cfg(test)] mod tests { - use crate::collector::TopDocs; - use crate::query::TermSetQuery; + use crate::query::{QueryParser, TermSetQuery}; use crate::schema::{Schema, TEXT}; use crate::{assert_nearly_equals, Index, Term}; @@ -215,4 +214,31 @@ mod tests { Ok(()) } + + #[test] + fn test_term_set_query_parser() -> crate::Result<()> { + let mut schema_builder = Schema::builder(); + schema_builder.add_text_field("field", TEXT); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema.clone()); + let mut index_writer = index.writer_for_tests()?; + let field = schema.get_field("field").unwrap(); + index_writer.add_document(doc!( + field => "val1", + ))?; + index_writer.add_document(doc!( + field => "val2", + ))?; + index_writer.add_document(doc!( + field => "val3", + ))?; + index_writer.commit()?; + let reader = index.reader()?; + let searcher = reader.searcher(); + let query_parser = QueryParser::for_index(&index, vec![]); + let query = query_parser.parse_query("field: IN [val1 val2]")?; + let top_docs = searcher.search(&query, &TopDocs::with_limit(3))?; + assert_eq!(top_docs.len(), 2); + Ok(()) + } }