From cfe1059092fdface253c359739079a3434b1b2a0 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 24 Feb 2022 12:41:12 +0900 Subject: [PATCH 1/2] Document guess field type mapping in MongoDB Additionally, improve the sentence how to handle wrong table definitions. --- docs/src/main/sphinx/connector/mongodb.rst | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/src/main/sphinx/connector/mongodb.rst b/docs/src/main/sphinx/connector/mongodb.rst index 04bf342606ce..5c86340373b9 100644 --- a/docs/src/main/sphinx/connector/mongodb.rst +++ b/docs/src/main/sphinx/connector/mongodb.rst @@ -84,7 +84,27 @@ This property is required; there is no default. A connection url or seeds must b As MongoDB is a document database, there is no fixed schema information in the system. So a special collection in each MongoDB database should define the schema of all tables. Please refer the :ref:`table-definition-label` section for the details. -At startup, this connector tries guessing fields' types, but it might not be correct for your collection. In that case, you need to modify it manually. ``CREATE TABLE`` and ``CREATE TABLE AS SELECT`` will create an entry for you. +At startup, the connector tries to guess the data type of fields based on the mapping in the following table. + +================== ================ ================================================ +MongoDB Trino Notes +================== ================ ================================================ +``Boolean`` ``BOOLEAN`` +``Int32`` ``BIGINT`` +``Int64`` ``BIGINT`` +``Double`` ``DOUBLE`` +``Date`` ``TIMESTAMP(3)`` +``String`` ``VARCHAR`` +``Binary`` ``VARBINARY`` +``ObjectId`` ``ObjectId`` +``Object`` ``ROW`` +``Array`` ``ARRAY`` Map to ``ROW`` if the element type is not unique +``DBRef`` ``ROW`` +================== ================ ================================================ + +The initial guess can be incorrect for your specific collection. In that case, you need to modify it manually. Please refer the :ref:`table-definition-label` section for the details. + +Creating new tables using ``CREATE TABLE`` and ``CREATE TABLE AS SELECT`` automatically create an entry for you. This property is optional; the default is ``_schema``. From 131976ca0fe61863638498d354ed60105b155cf2 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 24 Feb 2022 12:40:40 +0900 Subject: [PATCH 2/2] Add test to guess field types in MongoDB --- .../mongodb/BaseMongoConnectorTest.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/BaseMongoConnectorTest.java b/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/BaseMongoConnectorTest.java index b41b0ad93344..bd9c87608931 100644 --- a/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/BaseMongoConnectorTest.java +++ b/plugin/trino-mongodb/src/test/java/io/trino/plugin/mongodb/BaseMongoConnectorTest.java @@ -104,6 +104,39 @@ public void testSortItemsReflectedInExplain() "TopNPartial\\[5 by \\(nationkey DESC"); } + @Test(dataProvider = "guessFieldTypesProvider") + public void testGuessFieldTypes(String mongoValue, String trinoValue) + { + Document document = Document.parse(format("{\"test\":%s}", mongoValue)); + + assertUpdate("DROP TABLE IF EXISTS test.test_guess_field_type"); + client.getDatabase("test").getCollection("test_guess_field_type").insertOne(document); + + assertThat(query("SELECT test FROM test.test_guess_field_type")) + .matches("SELECT " + trinoValue); + + assertUpdate("DROP TABLE test.test_guess_field_type"); + } + + @DataProvider + public Object[][] guessFieldTypesProvider() + { + return new Object[][] { + {"true", "true"}, // boolean -> boolean + {"2147483647", "bigint '2147483647'"}, // int32 -> bigint + {"{\"$numberLong\": \"9223372036854775807\"}", "9223372036854775807"}, // int64 -> bigint + {"1.23", "double '1.23'"}, // double -> double + {"{\"$date\": \"1970-01-01T00:00:00.000Z\"}", "timestamp '1970-01-01 00:00:00.000'"}, // date -> timestamp(3) + {"'String type'", "varchar 'String type'"}, // string -> varchar + {"{$binary: \"\",\"$type\": \"0\"}", "to_utf8('')"}, // binary -> varbinary + {"{\"$oid\": \"6216f0c6c432d45190f25e7c\"}", "ObjectId('6216f0c6c432d45190f25e7c')"}, // objectid -> objectid + {"[1]", "array[bigint '1']"}, // array with single type -> array + {"{\"field\": \"object\"}", "CAST(row('object') AS row(field varchar))"}, // object -> row + {"[9, \"test\"]", "CAST(row(9, 'test') AS row(_pos1 bigint, _pos2 varchar))"}, // array with multiple types -> row + {"{\"$ref\":\"test_ref\",\"$id\":ObjectId(\"4e3f33de6266b5845052c02c\"),\"$db\":\"test_db\"}", "CAST(row('test_db', 'test_ref', ObjectId('4e3f33de6266b5845052c02c')) AS row(databasename varchar, collectionname varchar, id ObjectId))"}, // dbref -> row + }; + } + @Test public void createTableWithEveryType() {