Skip to content

Commit

Permalink
support more query operators in native query pipeline type inference (#…
Browse files Browse the repository at this point in the history
…121)

This is work an an in-progress feature that is gated behind a feature flag, `native-query-subcommand`

When generating configurations for native queries it is necessary to analyze operators in `$match` stages in aggregation pipelines to infer types for any parameters that appear in operator arguments.

This PR adds the logic to process these operators:

- `$and` | `$or` | `$nor`
- `$not`
- `$elemMatch`
- `$eq` | `$ne` | `$gt` | `$lt` | `$gte` | `$lte`
- `$in` | `$nin`
- `$exists`
- `$type`
- `$mod`
- `$regex`
- `$all`
- `$size`

The full set of available operators is given here: https://www.mongodb.com/docs/manual/reference/operator/query/
  • Loading branch information
hallettj authored Nov 19, 2024
1 parent 1577927 commit 5408060
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 40 deletions.
213 changes: 173 additions & 40 deletions crates/cli/src/native_query/pipeline/match_stage.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use mongodb::bson::{Bson, Document};
use mongodb_support::BsonScalarType;
use nonempty::nonempty;
use nonempty::NonEmpty;

use crate::native_query::{
aggregation_expression::infer_type_from_aggregation_expression,
Expand Down Expand Up @@ -41,61 +41,185 @@ fn check_match_doc_for_parameters_helper(
input_document_type: &TypeConstraint,
match_doc: Document,
) -> Result<()> {
if match_doc.keys().any(|key| key.starts_with("$")) {
analyze_document_with_match_operators(
context,
desired_object_type_name,
input_document_type,
match_doc,
)
} else {
analyze_document_with_field_name_keys(
context,
desired_object_type_name,
input_document_type,
match_doc,
)
for (key, value) in match_doc {
if key.starts_with("$") {
analyze_match_operator(
context,
desired_object_type_name,
input_document_type,
key,
value,
)?;
} else {
analyze_input_doc_field(
context,
desired_object_type_name,
input_document_type,
key,
value,
)?;
}
}
Ok(())
}

fn analyze_document_with_field_name_keys(
fn analyze_input_doc_field(
context: &mut PipelineTypeContext<'_>,
desired_object_type_name: &str,
input_document_type: &TypeConstraint,
match_doc: Document,
field_name: String,
match_expression: Bson,
) -> Result<()> {
for (field_name, match_expression) in match_doc {
let field_type = TypeConstraint::FieldOf {
target_type: Box::new(input_document_type.clone()),
path: nonempty![field_name.into()],
};
analyze_match_expression(
context,
desired_object_type_name,
&field_type,
match_expression,
)?;
}
Ok(())
let field_type = TypeConstraint::FieldOf {
target_type: Box::new(input_document_type.clone()),
path: NonEmpty::from_vec(field_name.split(".").map(Into::into).collect())
.ok_or_else(|| Error::Other("object field reference is an empty string".to_string()))?,
};
analyze_match_expression(
context,
desired_object_type_name,
&field_type,
match_expression,
)
}

fn analyze_document_with_match_operators(
fn analyze_match_operator(
context: &mut PipelineTypeContext<'_>,
desired_object_type_name: &str,
field_type: &TypeConstraint,
match_doc: Document,
operator: String,
match_expression: Bson,
) -> Result<()> {
for (operator, match_expression) in match_doc {
match operator.as_ref() {
"$eq" => analyze_match_expression(
match operator.as_ref() {
"$and" | "$or" | "$nor" => {
if let Bson::Array(array) = match_expression {
for expression in array {
check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
field_type,
expression
.as_document()
.ok_or_else(|| {
Error::Other(format!(
"expected argument to {operator} to be an array of objects"
))
})?
.clone(),
)?;
}
} else {
Err(Error::Other(format!(
"expected argument to {operator} to be an array of objects"
)))?;
}
}
"$not" => {
match match_expression {
Bson::Document(match_doc) => check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
field_type,
match_doc,
)?,
_ => Err(Error::Other(format!(
"{operator} operator requires a document",
)))?,
};
}
"$elemMatch" => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
match match_expression {
Bson::Document(match_doc) => check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
&element_type,
match_doc,
)?,
_ => Err(Error::Other(format!(
"{operator} operator requires a document",
)))?,
};
}
"$eq" | "$ne" | "$gt" | "$lt" | "$gte" | "$lte" => analyze_match_expression(
context,
desired_object_type_name,
field_type,
match_expression,
)?,
"$in" | "$nin" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::ArrayOf(Box::new(field_type.clone())),
match_expression,
)?,
"$exists" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Bool),
match_expression,
)?,
// In MongoDB $type accepts either a number, a string, an array of numbers, or an array of
// strings - for simplicity we're only accepting an array of strings since this form can
// express all comparisons that can be expressed with the other forms.
"$type" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::ArrayOf(Box::new(TypeConstraint::Scalar(BsonScalarType::String))),
match_expression,
)?,
"$mod" => match match_expression {
Bson::Array(xs) => {
if xs.len() != 2 {
Err(Error::Other(format!(
"{operator} operator requires exactly two arguments",
operator = operator
)))?;
}
for divisor_or_remainder in xs {
analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Int),
divisor_or_remainder,
)?;
}
}
_ => Err(Error::Other(format!(
"{operator} operator requires an array of two elements",
)))?,
},
"$regex" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Regex),
match_expression,
)?,
"$all" => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
// It's like passing field_type through directly, except that we move out of
// a possible nullable type, and we enforce an array type.
let argument_type = TypeConstraint::ArrayOf(Box::new(element_type));
analyze_match_expression(
context,
desired_object_type_name,
field_type,
&argument_type,
match_expression,
)?,
// TODO: more operators! ENG-1248
_ => Err(Error::UnknownMatchDocumentOperator(operator))?,
)?;
}
"$size" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Int),
match_expression,
)?,
_ => Err(Error::UnknownMatchDocumentOperator(operator))?,
}
Ok(())
}
Expand All @@ -114,7 +238,16 @@ fn analyze_match_expression(
field_type,
match_doc,
),
Bson::Array(_) => todo!(),
Bson::Array(xs) => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
for x in xs {
analyze_match_expression(context, desired_object_type_name, &element_type, x)?;
}
Ok(())
}
_ => Ok(()),
}
}
Expand Down
73 changes: 73 additions & 0 deletions crates/cli/src/native_query/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,79 @@ fn infers_parameter_type_from_binary_comparison() -> googletest::Result<()> {
Ok(())
}

#[googletest::test]
fn supports_various_query_predicate_operators() -> googletest::Result<()> {
let config = mflix_config();

let pipeline = Pipeline::new(vec![Stage::Match(doc! {
"title": { "$eq": "{{ title }}" },
"rated": { "$ne": "{{ rating }}" },
"year": "{{ year_1 }}",
"imdb.votes": { "$gt": "{{ votes }}" },
"num_mflix_comments": { "$in": "{{ num_comments_options }}" },
"$not": { "runtime": { "$lt": "{{ runtime }}" } },
"tomatoes.critic": { "$exists": "{{ critic_exists }}" },
"released": { "$type": ["date", "{{ other_type }}"] },
"$or": [
{ "$and": [
{ "writers": { "$eq": "{{ writers }}" } },
{ "year": "{{ year_2 }}", }
] },
{
"year": { "$mod": ["{{ divisor }}", "{{ expected_remainder }}"] },
"title": { "$regex": "{{ title_regex }}" },
},
],
"$and": [
{ "genres": { "$all": "{{ genres }}" } },
{ "genres": { "$all": ["{{ genre_1 }}"] } },
{ "genres": { "$elemMatch": {
"$gt": "{{ genre_start }}",
"$lt": "{{ genre_end }}",
}} },
{ "genres": { "$size": "{{ genre_size }}" } },
],
})]);

let native_query =
native_query_from_pipeline(&config, "operators_test", Some("movies".into()), pipeline)?;

expect_eq!(
native_query.arguments,
object_fields([
("title", Type::Scalar(BsonScalarType::String)),
("rating", Type::Scalar(BsonScalarType::String)),
("year_1", Type::Scalar(BsonScalarType::Int)),
("year_2", Type::Scalar(BsonScalarType::Int)),
("votes", Type::Scalar(BsonScalarType::Int)),
(
"num_comments_options",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::Int)))
),
("runtime", Type::Scalar(BsonScalarType::Int)),
("critic_exists", Type::Scalar(BsonScalarType::Bool)),
("other_type", Type::Scalar(BsonScalarType::String)),
(
"writers",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::String)))
),
("divisor", Type::Scalar(BsonScalarType::Int)),
("expected_remainder", Type::Scalar(BsonScalarType::Int)),
("title_regex", Type::Scalar(BsonScalarType::Regex)),
(
"genres",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::String)))
),
("genre_1", Type::Scalar(BsonScalarType::String)),
("genre_start", Type::Scalar(BsonScalarType::String)),
("genre_end", Type::Scalar(BsonScalarType::String)),
("genre_size", Type::Scalar(BsonScalarType::Int)),
])
);

Ok(())
}

#[googletest::test]
fn supports_various_aggregation_operators() -> googletest::Result<()> {
let config = mflix_config();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ fn element_of(array_type: Type) -> Result<Type> {
let element_type = match array_type {
Type::ArrayOf(elem_type) => Ok(*elem_type),
Type::Nullable(t) => element_of(*t).map(|t| Type::Nullable(Box::new(t))),
Type::ExtendedJSON => Ok(Type::ExtendedJSON),
_ => Err(Error::ExpectedArray {
actual_type: array_type,
}),
Expand Down
6 changes: 6 additions & 0 deletions crates/test-helpers/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ pub fn mflix_config() -> Configuration {
("credits", named_type("credits")),
("genres", array_of(named_type("String"))),
("imdb", named_type("Imdb")),
("lastUpdated", named_type("String")),
("num_mflix_comments", named_type("Int")),
("rated", named_type("String")),
("released", named_type("Date")),
("runtime", named_type("Int")),
("title", named_type("String")),
("writers", array_of(named_type("String"))),
("year", named_type("Int")),
("tomatoes", named_type("Tomatoes")),
]),
Expand Down

0 comments on commit 5408060

Please sign in to comment.