From a31124fb14ae825e338860014e1263a494c3a265 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 12 Dec 2022 15:18:01 +0530 Subject: [PATCH] fix(ingest): mysql - fix mysql ingestion issue with non-lowercase database (#6713) --- .../source/sql/two_tier_sql_source.py | 8 +++++ .../mysql/mysql_mces_no_db_golden.json | 34 +++++++++---------- .../tests/integration/mysql/setup/setup.sql | 10 +++--- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index c62c9c88f88543..f7e18dc066647e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -94,3 +94,11 @@ def gen_schema_containers( self, schema: str, db_name: str ) -> typing.Iterable[MetadataWorkUnit]: return [] + + def get_db_name(self, inspector: Inspector) -> str: + engine = inspector.engine + + if engine and hasattr(engine, "url") and hasattr(engine.url, "database"): + return str(engine.url.database).strip('"') + else: + raise Exception("Unable to get database name from Sqlalchemy inspector") diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index ee49fbbf2ccebe..59f6e127eff0cf 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -1,11 +1,11 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:9191fea5add3487ba6b8266d4c74a7d1", + "entityUrn": "urn:li:container:0f72a1bc79da282eb614cc089c0ba302", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"datacharmer\"}, \"name\": \"datacharmer\"}", + "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"dataCharmer\"}, \"name\": \"dataCharmer\"}", "contentType": "application/json" }, "systemMetadata": { @@ -15,7 +15,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:9191fea5add3487ba6b8266d4c74a7d1", + "entityUrn": "urn:li:container:0f72a1bc79da282eb614cc089c0ba302", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -29,7 +29,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:9191fea5add3487ba6b8266d4c74a7d1", + "entityUrn": "urn:li:container:0f72a1bc79da282eb614cc089c0ba302", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -43,7 +43,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:9191fea5add3487ba6b8266d4c74a7d1", + "entityUrn": "urn:li:container:0f72a1bc79da282eb614cc089c0ba302", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -57,11 +57,11 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.employees,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:9191fea5add3487ba6b8266d4c74a7d1\"}", + "value": "{\"container\": \"urn:li:container:0f72a1bc79da282eb614cc089c0ba302\"}", "contentType": "application/json" }, "systemMetadata": { @@ -72,7 +72,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.employees,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -88,7 +88,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "datacharmer.employees", + "schemaName": "dataCharmer.employees", "platform": "urn:li:dataPlatform:mysql", "version": 0, "created": { @@ -191,7 +191,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.employees,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -205,11 +205,11 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.salaries,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:9191fea5add3487ba6b8266d4c74a7d1\"}", + "value": "{\"container\": \"urn:li:container:0f72a1bc79da282eb614cc089c0ba302\"}", "contentType": "application/json" }, "systemMetadata": { @@ -220,7 +220,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.salaries,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -236,7 +236,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "datacharmer.salaries", + "schemaName": "dataCharmer.salaries", "platform": "urn:li:dataPlatform:mysql", "version": 0, "created": { @@ -315,7 +315,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.salaries,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -329,7 +329,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.employees,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -343,7 +343,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,dataCharmer.salaries,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { diff --git a/metadata-ingestion/tests/integration/mysql/setup/setup.sql b/metadata-ingestion/tests/integration/mysql/setup/setup.sql index ed6df6ac17dd8d..c8a88aff0f2533 100644 --- a/metadata-ingestion/tests/integration/mysql/setup/setup.sql +++ b/metadata-ingestion/tests/integration/mysql/setup/setup.sql @@ -48,13 +48,13 @@ CREATE TABLE metadata_index ( CREATE VIEW metadata_index_view AS SELECT id, urn, path, doubleVal FROM metadata_index; -- ----------------------------------------------------- --- Some sample data, from https://github.com/datacharmer/test_db. +-- Some sample data, from https://github.com/dataCharmer/test_db. -- ----------------------------------------------------- -CREATE SCHEMA IF NOT EXISTS `datacharmer` ; -USE `datacharmer` ; +CREATE SCHEMA IF NOT EXISTS `dataCharmer` ; +USE `dataCharmer` ; -CREATE TABLE `datacharmer`.`employees` ( +CREATE TABLE `dataCharmer`.`employees` ( emp_no INT NOT NULL, birth_date DATE NOT NULL, first_name VARCHAR(14) NOT NULL, @@ -64,7 +64,7 @@ CREATE TABLE `datacharmer`.`employees` ( PRIMARY KEY (emp_no) ); -CREATE TABLE `datacharmer`.`salaries` ( +CREATE TABLE `dataCharmer`.`salaries` ( emp_no INT NOT NULL, salary INT NOT NULL, from_date DATE NOT NULL,