From b5ab2c5c76a3b79e71e130851b74c949997f1036 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 22 Dec 2021 09:41:31 -0800 Subject: [PATCH] fix: dataset extra import/export (#17740) * fix: dataset extra import/export * Update superset/datasets/commands/importers/v1/utils.py --- superset/datasets/commands/export.py | 2 +- .../datasets/commands/importers/v1/utils.py | 5 ++- superset/datasets/schemas.py | 31 ++++++++++++++++--- .../datasets/commands_tests.py | 5 ++- .../fixtures/importexport.py | 2 +- .../datasets/commands/export_test.py | 3 +- .../commands/importers/v1/import_test.py | 23 +++++++++----- 7 files changed, 52 insertions(+), 19 deletions(-) diff --git a/superset/datasets/commands/export.py b/superset/datasets/commands/export.py index 45460f36e3455..4e3843a0daee9 100644 --- a/superset/datasets/commands/export.py +++ b/superset/datasets/commands/export.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) -JSON_KEYS = {"params", "template_params"} +JSON_KEYS = {"params", "template_params", "extra"} class ExportDatasetsCommand(ExportModelsCommand): diff --git a/superset/datasets/commands/importers/v1/utils.py b/superset/datasets/commands/importers/v1/utils.py index 92687324aab8c..ba2b7df26174a 100644 --- a/superset/datasets/commands/importers/v1/utils.py +++ b/superset/datasets/commands/importers/v1/utils.py @@ -36,7 +36,7 @@ CHUNKSIZE = 512 VARCHAR = re.compile(r"VARCHAR\((\d+)\)", re.IGNORECASE) -JSON_KEYS = {"params", "template_params"} +JSON_KEYS = {"params", "template_params", "extra"} type_map = { @@ -97,8 +97,7 @@ def import_dataset( logger.info("Unable to encode `%s` field: %s", key, config[key]) for key in ("metrics", "columns"): for attributes in config.get(key, []): - # should be a dictionary, but in initial exports this was a string - if isinstance(attributes.get("extra"), dict): + if attributes.get("extra") is not None: try: attributes["extra"] = json.dumps(attributes["extra"]) except TypeError: diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index e8e30c65769ce..2390040cc4fa1 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import json import re +from typing import Any, Dict from flask_babel import lazy_gettext as _ -from marshmallow import fields, Schema, ValidationError +from marshmallow import fields, pre_load, Schema, ValidationError from marshmallow.validate import Length get_delete_ids_schema = {"type": "array", "items": {"type": "integer"}} @@ -130,9 +132,19 @@ class DatasetRelatedObjectsResponse(Schema): class ImportV1ColumnSchema(Schema): + # pylint: disable=no-self-use, unused-argument + @pre_load + def fix_extra(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]: + """ + Fix for extra initially beeing exported as a string. + """ + if isinstance(data.get("extra"), str): + data["extra"] = json.loads(data["extra"]) + + return data + column_name = fields.String(required=True) - # extra was initially exported incorrectly as a string - extra = fields.Raw(allow_none=True) + extra = fields.Dict(allow_none=True) verbose_name = fields.String(allow_none=True) is_dttm = fields.Boolean(default=False, allow_none=True) is_active = fields.Boolean(default=True, allow_none=True) @@ -156,6 +168,17 @@ class ImportV1MetricSchema(Schema): class ImportV1DatasetSchema(Schema): + # pylint: disable=no-self-use, unused-argument + @pre_load + def fix_extra(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]: + """ + Fix for extra initially beeing exported as a string. + """ + if isinstance(data.get("extra"), str): + data["extra"] = json.loads(data["extra"]) + + return data + table_name = fields.String(required=True) main_dttm_col = fields.String(allow_none=True) description = fields.String(allow_none=True) @@ -168,7 +191,7 @@ class ImportV1DatasetSchema(Schema): template_params = fields.Dict(allow_none=True) filter_select_enabled = fields.Boolean() fetch_values_predicate = fields.String(allow_none=True) - extra = fields.String(allow_none=True) + extra = fields.Dict(allow_none=True) uuid = fields.UUID(required=True) columns = fields.List(fields.Nested(ImportV1ColumnSchema)) metrics = fields.List(fields.Nested(ImportV1MetricSchema)) diff --git a/tests/integration_tests/datasets/commands_tests.py b/tests/integration_tests/datasets/commands_tests.py index 67ff3a7918ae1..ac0319aa9171e 100644 --- a/tests/integration_tests/datasets/commands_tests.py +++ b/tests/integration_tests/datasets/commands_tests.py @@ -325,7 +325,10 @@ def test_import_v1_dataset(self, mock_g): assert dataset.template_params == "{}" assert dataset.filter_select_enabled assert dataset.fetch_values_predicate is None - assert dataset.extra == "dttm > sysdate() -10 " + assert ( + dataset.extra + == '{"certification": {"certified_by": "Data Platform Team", "details": "This table is the source of truth."}, "warning_markdown": "This is a warning."}' + ) # user should be included as one of the owners assert dataset.owners == [mock_g.user] diff --git a/tests/integration_tests/fixtures/importexport.py b/tests/integration_tests/fixtures/importexport.py index e4a2ec29d8611..4ffe0643ef2ec 100644 --- a/tests/integration_tests/fixtures/importexport.py +++ b/tests/integration_tests/fixtures/importexport.py @@ -373,7 +373,7 @@ "template_params": {}, "filter_select_enabled": True, "fetch_values_predicate": None, - "extra": "dttm > sysdate() -10 ", + "extra": '{ "certification": { "certified_by": "Data Platform Team", "details": "This table is the source of truth." }, "warning_markdown": "This is a warning." }', "metrics": [ { "metric_name": "count", diff --git a/tests/unit_tests/datasets/commands/export_test.py b/tests/unit_tests/datasets/commands/export_test.py index 5c67ab2e145ad..a5967350ec537 100644 --- a/tests/unit_tests/datasets/commands/export_test.py +++ b/tests/unit_tests/datasets/commands/export_test.py @@ -98,7 +98,8 @@ def test_export(app_context: None, session: Session) -> None: answer: '42' filter_select_enabled: 1 fetch_values_predicate: foo IN (1, 2) -extra: '{{\"warning_markdown\": \"*WARNING*\"}}' +extra: + warning_markdown: '*WARNING*' uuid: null metrics: - metric_name: cnt diff --git a/tests/unit_tests/datasets/commands/importers/v1/import_test.py b/tests/unit_tests/datasets/commands/importers/v1/import_test.py index 99fa42d072f3d..922cf9a1f2876 100644 --- a/tests/unit_tests/datasets/commands/importers/v1/import_test.py +++ b/tests/unit_tests/datasets/commands/importers/v1/import_test.py @@ -22,6 +22,8 @@ from sqlalchemy.orm.session import Session +from superset.datasets.schemas import ImportV1DatasetSchema + def test_import_(app_context: None, session: Session) -> None: """ @@ -56,7 +58,7 @@ def test_import_(app_context: None, session: Session) -> None: "template_params": {"answer": "42",}, "filter_select_enabled": True, "fetch_values_predicate": "foo IN (1, 2)", - "extra": '{"warning_markdown": "*WARNING*"}', + "extra": {"warning_markdown": "*WARNING*"}, "uuid": dataset_uuid, "metrics": [ { @@ -147,7 +149,8 @@ def test_import_column_extra_is_string(app_context: None, session: Session) -> N session.flush() dataset_uuid = uuid.uuid4() - config: Dict[str, Any] = { + yaml_config: Dict[str, Any] = { + "version": "1.0.0", "table_name": "my_table", "main_dttm_col": "ds", "description": "This is the description", @@ -171,11 +174,11 @@ def test_import_column_extra_is_string(app_context: None, session: Session) -> N { "column_name": "profit", "verbose_name": None, - "is_dttm": None, - "is_active": None, + "is_dttm": False, + "is_active": True, "type": "INTEGER", - "groupby": None, - "filterable": None, + "groupby": False, + "filterable": False, "expression": "revenue-expenses", "description": None, "python_date_format": None, @@ -183,8 +186,12 @@ def test_import_column_extra_is_string(app_context: None, session: Session) -> N } ], "database_uuid": database.uuid, - "database_id": database.id, } - sqla_table = import_dataset(session, config) + schema = ImportV1DatasetSchema() + dataset_config = schema.load(yaml_config) + dataset_config["database_id"] = database.id + sqla_table = import_dataset(session, dataset_config) + assert sqla_table.columns[0].extra == '{"certified_by": "User"}' + assert sqla_table.extra == '{"warning_markdown": "*WARNING*"}'