From 2a55eb20cc04f52dae554578acfb0ad07b786ed2 Mon Sep 17 00:00:00 2001 From: David Overton Date: Thu, 13 Jun 2024 14:39:20 +1000 Subject: [PATCH 1/2] Do not make _id fields nullable when sampling documents --- crates/cli/src/introspection/sampling.rs | 79 ++++++++++++++++++----- crates/cli/src/lib.rs | 2 +- crates/configuration/src/configuration.rs | 2 +- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index 51dc41f9..fbdf679e 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -19,7 +19,7 @@ type ObjectType = WithName; /// are not unifiable. pub async fn sample_schema_from_db( sample_size: u32, - all_schema_nullalble: bool, + all_schema_nullable: bool, config_file_changed: bool, state: &ConnectorState, existing_schemas: &HashSet, @@ -32,7 +32,7 @@ pub async fn sample_schema_from_db( let collection_name = collection_spec.name; if !existing_schemas.contains(&collection_name) || config_file_changed { let collection_schema = - sample_schema_from_collection(&collection_name, sample_size, all_schema_nullalble, state).await?; + sample_schema_from_collection(&collection_name, sample_size, all_schema_nullable, state).await?; schemas.insert(collection_name, collection_schema); } } @@ -42,7 +42,7 @@ pub async fn sample_schema_from_db( async fn sample_schema_from_collection( collection_name: &str, sample_size: u32, - all_schema_nullalble: bool, + all_schema_nullable: bool, state: &ConnectorState, ) -> anyhow::Result { let db = state.database(); @@ -52,8 +52,9 @@ async fn sample_schema_from_collection( .aggregate(vec![doc! {"$sample": { "size": sample_size }}], options) .await?; let mut collected_object_types = vec![]; + let is_collection_type = true; while let Some(document) = cursor.try_next().await? { - let object_types = make_object_type(collection_name, &document, all_schema_nullalble); + let object_types = make_object_type(collection_name, &document, is_collection_type, all_schema_nullable); collected_object_types = if collected_object_types.is_empty() { object_types } else { @@ -74,13 +75,13 @@ async fn sample_schema_from_collection( }) } -fn make_object_type(object_type_name: &str, document: &Document, all_schema_nullalble: bool) -> Vec { +fn make_object_type(object_type_name: &str, document: &Document, is_collection_type: bool, all_schema_nullable: bool) -> Vec { let (mut object_type_defs, object_fields) = { let type_prefix = format!("{object_type_name}_"); let (object_type_defs, object_fields): (Vec>, Vec) = document .iter() .map(|(field_name, field_value)| { - make_object_field(&type_prefix, field_name, field_value, all_schema_nullalble) + make_object_field(&type_prefix, field_name, field_value, is_collection_type, all_schema_nullable) }) .unzip(); (object_type_defs.concat(), object_fields) @@ -102,10 +103,11 @@ fn make_object_field( type_prefix: &str, field_name: &str, field_value: &Bson, - all_schema_nullalble: bool, + is_collection_type: bool, + all_schema_nullable: bool, ) -> (Vec, ObjectField) { let object_type_name = format!("{type_prefix}{field_name}"); - let (collected_otds, field_type) = make_field_type(&object_type_name, field_value, all_schema_nullalble); + let (collected_otds, field_type) = make_field_type(&object_type_name, field_value, all_schema_nullable); let object_field_value = WithName::named( field_name.to_owned(), schema::ObjectField { @@ -113,7 +115,8 @@ fn make_object_field( r#type: field_type, }, ); - let object_field = if all_schema_nullalble { + let object_field = if all_schema_nullable && !(is_collection_type && field_name == "_id") { + // The _id field on a collection type should never be nullable. make_nullable_field(object_field_value) } else { object_field_value @@ -126,13 +129,13 @@ fn make_object_field( pub fn type_from_bson( object_type_name: &str, value: &Bson, - all_schema_nullalble: bool, + all_schema_nullable: bool, ) -> (BTreeMap, Type) { - let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullalble); + let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullable); (WithName::into_map(object_types), t) } -fn make_field_type(object_type_name: &str, field_value: &Bson, all_schema_nullalble: bool) -> (Vec, Type) { +fn make_field_type(object_type_name: &str, field_value: &Bson, all_schema_nullable: bool) -> (Vec, Type) { fn scalar(t: BsonScalarType) -> (Vec, Type) { (vec![], Type::Scalar(t)) } @@ -144,7 +147,7 @@ fn make_field_type(object_type_name: &str, field_value: &Bson, all_schema_nullal let mut collected_otds = vec![]; let mut result_type = Type::Scalar(Undefined); for elem in arr { - let (elem_collected_otds, elem_type) = make_field_type(object_type_name, elem, all_schema_nullalble); + let (elem_collected_otds, elem_type) = make_field_type(object_type_name, elem, all_schema_nullable); collected_otds = if collected_otds.is_empty() { elem_collected_otds } else { @@ -155,7 +158,8 @@ fn make_field_type(object_type_name: &str, field_value: &Bson, all_schema_nullal (collected_otds, Type::ArrayOf(Box::new(result_type))) } Bson::Document(document) => { - let collected_otds = make_object_type(object_type_name, document, all_schema_nullalble); + let is_collection_type = false; + let collected_otds = make_object_type(object_type_name, document, is_collection_type, all_schema_nullable); (collected_otds, Type::Object(object_type_name.to_owned())) } Bson::Boolean(_) => scalar(Bool), @@ -195,7 +199,7 @@ mod tests { fn simple_doc() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_int": 1, "my_string": "two"}; - let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([( object_name.to_owned(), @@ -225,11 +229,52 @@ mod tests { Ok(()) } + #[test] + fn simple_doc_nullable_fields() -> Result<(), anyhow::Error> { + let object_name = "foo"; + let doc = doc! {"my_int": 1, "my_string": "two", "_id": 0}; + let result = WithName::into_map::>(make_object_type(object_name, &doc, true, true)); + + let expected = BTreeMap::from([( + object_name.to_owned(), + ObjectType { + fields: BTreeMap::from([ + ( + "_id".to_owned(), + ObjectField { + r#type: Type::Scalar(BsonScalarType::Int), + description: None, + }, + ), + ( + "my_int".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::Int))), + description: None, + }, + ), + ( + "my_string".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))), + description: None, + }, + ), + ]), + description: None, + }, + )]); + + assert_eq!(expected, result); + + Ok(()) + } + #[test] fn array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]}; - let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([ ( @@ -289,7 +334,7 @@ mod tests { fn non_unifiable_array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]}; - let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([ ( diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index f171e515..5e24dce2 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -55,7 +55,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { None => configuration_options.introspection_options.no_validator_schema }; let all_schema_nullable = match args.all_schema_nullable { - Some(validator) => validator, + Some(b) => b, None => configuration_options.introspection_options.all_schema_nullable }; let config_file_changed = configuration::get_config_file_changed(&context.path).await?; diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs index 8c645515..e7719ec7 100644 --- a/crates/configuration/src/configuration.rs +++ b/crates/configuration/src/configuration.rs @@ -206,7 +206,7 @@ pub struct ConfigurationIntrospectionOptions { // Whether to try validator schema first if one exists. pub no_validator_schema: bool, - // Default to setting all schema fields as nullable. + // Default to setting all schema fields, except the _id field on collection types, as nullable. pub all_schema_nullable: bool, } From 2fd76608383683cc921bec95c57952fba81ae0fa Mon Sep 17 00:00:00 2001 From: David Overton Date: Thu, 13 Jun 2024 14:44:39 +1000 Subject: [PATCH 2/2] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6efc455..74cb6ef1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ This changelog documents the changes between release versions. - Support for root collection column references ([#75](https://github.com/hasura/ndc-mongodb/pull/75)) - Fix for databases with field names that begin with a dollar sign, or that contain dots ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) - Implement column-to-column comparisons within the same collection ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) +- Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78)) ## [0.0.6] - 2024-05-01 - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67))