From 18d99df700aa326cde5406ce8fe2484b0152bf8f Mon Sep 17 00:00:00 2001 From: Ville Brofeldt Date: Sun, 29 Dec 2019 13:11:21 +0200 Subject: [PATCH 1/3] Add datasource.changed_on to cache_key and add+fix related unit tests --- setup.cfg | 2 +- superset/common/query_context.py | 13 ++++-- superset/common/query_object.py | 5 +-- superset/viz.py | 1 + tests/core_tests.py | 76 ++++++++++++++++++++++---------- 5 files changed, 64 insertions(+), 33 deletions(-) diff --git a/setup.cfg b/setup.cfg index 20c7b74d5c42e..8ca150e80ee33 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,7 +45,7 @@ combine_as_imports = true include_trailing_comma = true line_length = 88 known_first_party = superset -known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,psycopg2,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml +known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml multi_line_output = 3 order_by_type = false diff --git a/superset/common/query_context.py b/superset/common/query_context.py index b234c14773928..f80e8ce5c4727 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -157,20 +157,25 @@ def cache_timeout(self) -> int: return self.datasource.database.cache_timeout return config["CACHE_DEFAULT_TIMEOUT"] - def get_df_payload( # pylint: disable=too-many-locals,too-many-statements - self, query_obj: QueryObject, **kwargs - ) -> Dict[str, Any]: - """Handles caching around the df payload retrieval""" + def cache_key(self, query_obj: QueryObject, **kwargs) -> Optional[str]: extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, + changed_on=self.datasource.changed_on, **kwargs ) if query_obj else None ) + return cache_key + + def get_df_payload( # pylint: disable=too-many-locals,too-many-statements + self, query_obj: QueryObject, **kwargs + ) -> Dict[str, Any]: + """Handles caching around the df payload retrieval""" + cache_key = self.cache_key(query_obj, **kwargs) logging.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 21649d1d0a32c..2109af06bd4a4 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -83,10 +83,7 @@ def __init__( # Temporal solution for backward compatability issue # due the new format of non-ad-hoc metric. - self.metrics = [ - metric if "expressionType" in metric else metric["label"] # type: ignore - for metric in metrics - ] + self.metrics = [utils.get_metric_name(metric) for metric in metrics] self.row_limit = row_limit self.filter = filters or [] self.timeseries_limit = timeseries_limit diff --git a/superset/viz.py b/superset/viz.py index 617ccdc895984..a258636bc440d 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -372,6 +372,7 @@ def cache_key(self, query_obj, **extra): cache_dict["time_range"] = self.form_data.get("time_range") cache_dict["datasource"] = self.datasource.uid cache_dict["extra_cache_keys"] = self.datasource.get_extra_cache_keys(query_obj) + cache_dict["changed_on"] = self.datasource.changed_on json_data = self.json_dumps(cache_dict, sort_keys=True) return hashlib.md5(json_data.encode("utf-8")).hexdigest() diff --git a/tests/core_tests.py b/tests/core_tests.py index 2b1b840c47165..060b29255696e 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -28,15 +28,17 @@ import random import re import string +from typing import Any, Dict import unittest from unittest import mock import pandas as pd -import psycopg2 import sqlalchemy as sqla from tests.test_app import app from superset import dataframe, db, jinja_context, security_manager, sql_lab +from superset.common.query_context import QueryContext +from superset.connectors.connector_registry import ConnectorRegistry from superset.connectors.sqla.models import SqlaTable from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.mssql import MssqlEngineSpec @@ -98,7 +100,23 @@ def test_slice_endpoint(self): resp = self.client.get("/superset/slice/-1/") assert resp.status_code == 404 - def test_cache_key(self): + def _get_query_context_dict(self) -> Dict[str, Any]: + self.login(username="admin") + slc = self.get_slice("Girl Name Cloud", db.session) + return { + "datasource": {"id": slc.datasource_id, "type": slc.datasource_type}, + "queries": [ + { + "granularity": "ds", + "groupby": ["name"], + "metrics": ["sum__num"], + "filters": [], + "row_limit": 100, + } + ], + } + + def test_viz_cache_key(self): self.login(username="admin") slc = self.get_slice("Girls", db.session) @@ -110,30 +128,40 @@ def test_cache_key(self): qobj["groupby"] = [] self.assertNotEqual(cache_key, viz.cache_key(qobj)) + def test_cache_key_changes_when_datasource_is_updated(self): + qc_dict = self._get_query_context_dict() + + # construct baseline cache_key + query_context = QueryContext(**qc_dict) + query_object = query_context.queries[0] + cache_key_original = query_context.cache_key(query_object) + + # make temporary change and revert it to refresh the changed_on property + datasource = ConnectorRegistry.get_datasource( + datasource_type=qc_dict["datasource"]["type"], + datasource_id=qc_dict["datasource"]["id"], + session=db.session, + ) + description_original = datasource.description + datasource.description = "temporary description" + db.session.commit() + datasource.description = description_original + db.session.commit() + + # create new QueryContext with unchanged attributes and extract new cache_key + query_context = QueryContext(**qc_dict) + query_object = query_context.queries[0] + cache_key_new = query_context.cache_key(query_object) + + # the new cache_key should be different due to updated datasource + self.assertNotEqual(cache_key_original, cache_key_new) + def test_api_v1_query_endpoint(self): self.login(username="admin") - slc = self.get_slice("Girl Name Cloud", db.session) - form_data = slc.form_data - data = json.dumps( - { - "datasource": {"id": slc.datasource_id, "type": slc.datasource_type}, - "queries": [ - { - "granularity": "ds", - "groupby": ["name"], - "metrics": ["sum__num"], - "filters": [], - "time_range": "{} : {}".format( - form_data.get("since"), form_data.get("until") - ), - "limit": 100, - } - ], - } - ) - # TODO: update once get_data is implemented for QueryObject - with self.assertRaises(Exception): - self.get_resp("/api/v1/query/", {"query_context": data}) + qc_dict = self._get_query_context_dict() + data = json.dumps(qc_dict) + resp = json.loads(self.get_resp("/api/v1/query/", {"query_context": data})) + self.assertEqual(resp[0]["rowcount"], 100) def test_old_slice_json_endpoint(self): self.login(username="admin") From 1454bf23778f787e4f2ed2c4c27316ce448b1515 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt Date: Mon, 13 Jan 2020 23:08:35 +0200 Subject: [PATCH 2/3] Add note to UPDATING.md --- UPDATING.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index aebe63a3e7117..533414fcea29a 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -22,8 +22,12 @@ This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. ## Next +* [8901](https://github.com/apache/incubator-superset/pull/8901): The datasource's update +timestamp has been added to the query object's cache key to ensure updates to +datasources are always reflected in associated query results. As a consequence all +previously cached results will be invalidated when updating to the next version. -* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default. +* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default. A new permission `show on SwaggerView` is created by `superset init` and given to the `Admin` Role. To disable the UI, set `FAB_API_SWAGGER_UI = False` on config. @@ -90,9 +94,9 @@ which adds missing non-nullable fields to the `datasources` table. Depending on the integrity of the data, manual intervention may be required. * [5452](https://github.com/apache/incubator-superset/pull/5452): a change -which adds missing non-nullable fields and uniqueness constraints (which may be -case insensitive depending on your database configuration) to the `columns`and -`table_columns` tables. Depending on the integrity of the data, manual +which adds missing non-nullable fields and uniqueness constraints (which may be +case insensitive depending on your database configuration) to the `columns`and +`table_columns` tables. Depending on the integrity of the data, manual intervention may be required. * `fabmanager` command line is deprecated since Flask-AppBuilder 2.0.0, use the new `flask fab ` integrated with *Flask cli*. @@ -100,7 +104,7 @@ the new `flask fab ` integrated with *Flask cli*. `FAB_UPDATE_PERMS` config boolean key. To disable automatic creation of permissions set `FAB_UPDATE_PERMS = False` on config. * [5453](https://github.com/apache/incubator-superset/pull/5453): a change -which adds missing non-nullable fields and uniqueness constraints (which may be +which adds missing non-nullable fields and uniqueness constraints (which may be case insensitive depending on your database configuration) to the metrics and sql_metrics tables. Depending on the integrity of the data, manual intervention may be required. From 80b8474f84ab982b28a1460bd445b6ab59b6b651 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt Date: Mon, 13 Jan 2020 23:14:22 +0200 Subject: [PATCH 3/3] Remove redundant comment about metric names --- superset/common/query_object.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 2109af06bd4a4..dad50ea48d063 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -81,8 +81,6 @@ def __init__( self.time_shift = utils.parse_human_timedelta(time_shift) self.groupby = groupby or [] - # Temporal solution for backward compatability issue - # due the new format of non-ad-hoc metric. self.metrics = [utils.get_metric_name(metric) for metric in metrics] self.row_limit = row_limit self.filter = filters or []