From ef2670ca32e8ba0a04f9dc7899db55812e2f9b46 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 12 Oct 2016 13:50:47 -0700 Subject: [PATCH] Using inheritance scheme to organize db specific code (#1294) * Using inheritance scheme to organize db specific code * Addressing comments --- caravel/data/__init__.py | 4 +- caravel/db_engine_specs.py | 213 +++++++++++++++++++++++++++++++++++++ caravel/models.py | 148 +++++--------------------- caravel/utils.py | 8 +- caravel/viz.py | 2 +- run_specific_test.sh | 2 +- tests/core_tests.py | 4 +- 7 files changed, 248 insertions(+), 133 deletions(-) create mode 100644 caravel/db_engine_specs.py diff --git a/caravel/data/__init__.py b/caravel/data/__init__.py index 0f9e2a308efa3..dd8626fd960f1 100644 --- a/caravel/data/__init__.py +++ b/caravel/data/__init__.py @@ -212,7 +212,7 @@ def load_world_bank_health_n_pop(): "metrics": ["sum__SP_POP_TOTL"], "row_limit": config.get("ROW_LIMIT"), "since": "2014-01-01", - "until": "2014-01-01", + "until": "2014-01-02", "where": "", "markup_type": "markdown", "country_fieldtype": "cca3", @@ -285,7 +285,7 @@ def load_world_bank_health_n_pop(): defaults, viz_type='bubble', since="2011-01-01", - until="2011-01-01", + until="2011-01-02", series="region", limit="0", entity="country_name", diff --git a/caravel/db_engine_specs.py b/caravel/db_engine_specs.py new file mode 100644 index 0000000000000..fa8f7e67eeb9c --- /dev/null +++ b/caravel/db_engine_specs.py @@ -0,0 +1,213 @@ +"""Compatibility layer for different database engines + +This modules stores logic specific to different database engines. Things +like time-related functions that are similar but not identical, or +information as to expose certain features or not and how to expose them. + +For instance, Hive/Presto supports partitions and have a specific API to +list partitions. Other databases like Vertica also support partitions but +have different API to get to them. Other databases don't support partitions +at all. The classes here will use a common interface to specify all this. + +The general idea is to use static classes and an inheritance scheme. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import inspect +from collections import namedtuple +from flask_babel import lazy_gettext as _ + +Grain = namedtuple('Grain', 'name label function') + + +class BaseEngineSpec(object): + engine = 'base' # str as defined in sqlalchemy.engine.engine + time_grains = tuple() + + @classmethod + def epoch_to_dttm(cls): + raise NotImplementedError() + + @classmethod + def epoch_ms_to_dttm(cls): + return cls.epoch_to_dttm().replace('{col}', '({col}/1000.0)') + + @classmethod + def extra_table_metadata(cls, table): + """Returns engine-specific table metadata""" + return {} + + @classmethod + def convert_dttm(cls, target_type, dttm): + return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + + +class PostgresEngineSpec(BaseEngineSpec): + engine = 'postgres' + + time_grains = ( + Grain("Time Column", _('Time Column'), "{col}"), + Grain("second", _('second'), "DATE_TRUNC('second', {col})"), + Grain("minute", _('minute'), "DATE_TRUNC('minute', {col})"), + Grain("hour", _('hour'), "DATE_TRUNC('hour', {col})"), + Grain("day", _('day'), "DATE_TRUNC('day', {col})"), + Grain("week", _('week'), "DATE_TRUNC('week', {col})"), + Grain("month", _('month'), "DATE_TRUNC('month', {col})"), + Grain("year", _('year'), "DATE_TRUNC('year', {col})"), + ) + + @classmethod + def epoch_to_dttm(cls): + return "(timestamp 'epoch' + {col} * interval '1 second')" + + @classmethod + def convert_dttm(cls, target_type, dttm): + return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + + +class SqliteEngineSpec(BaseEngineSpec): + engine = 'sqlite' + time_grains = ( + Grain('Time Column', _('Time Column'), '{col}'), + Grain('day', _('day'), 'DATE({col})'), + Grain("week", _('week'), + "DATE({col}, -strftime('%w', {col}) || ' days')"), + Grain("month", _('month'), + "DATE({col}, -strftime('%d', {col}) || ' days')"), + ) + + @classmethod + def epoch_to_dttm(cls): + return "datetime({col}, 'unixepoch')" + + @classmethod + def convert_dttm(cls, target_type, dttm): + iso = dttm.isoformat().replace('T', ' ') + if '.' not in iso: + iso += '.000000' + return "'{}'".format(iso) + + +class MySQLEngineSpec(BaseEngineSpec): + engine = 'mysql' + time_grains = ( + Grain('Time Column', _('Time Column'), '{col}'), + Grain("second", _('second'), "DATE_ADD(DATE({col}), " + "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60" + " + SECOND({col})) SECOND)"), + Grain("minute", _('minute'), "DATE_ADD(DATE({col}), " + "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)"), + Grain("hour", _('hour'), "DATE_ADD(DATE({col}), " + "INTERVAL HOUR({col}) HOUR)"), + Grain('day', _('day'), 'DATE({col})'), + Grain("week", _('week'), "DATE(DATE_SUB({col}, " + "INTERVAL DAYOFWEEK({col}) - 1 DAY))"), + Grain("month", _('month'), "DATE(DATE_SUB({col}, " + "INTERVAL DAYOFMONTH({col}) - 1 DAY))"), + ) + @classmethod + def convert_dttm(cls, target_type, dttm): + if target_type.upper() in ('DATETIME', 'DATE'): + return "STR_TO_DATE('{}', '%Y-%m-%d %H:%i:%s')".format( + dttm.strftime('%Y-%m-%d %H:%M:%S')) + return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + + @classmethod + def epoch_to_dttm(cls): + return "from_unixtime({col})" + + +class PrestoEngineSpec(BaseEngineSpec): + engine = 'presto' + + time_grains = ( + Grain('Time Column', _('Time Column'), '{col}'), + Grain('second', _('second'), + "date_trunc('second', CAST({col} AS TIMESTAMP))"), + Grain('minute', _('minute'), + "date_trunc('minute', CAST({col} AS TIMESTAMP))"), + Grain('hour', _('hour'), + "date_trunc('hour', CAST({col} AS TIMESTAMP))"), + Grain('day', _('day'), + "date_trunc('day', CAST({col} AS TIMESTAMP))"), + Grain('week', _('week'), + "date_trunc('week', CAST({col} AS TIMESTAMP))"), + Grain('month', _('month'), + "date_trunc('month', CAST({col} AS TIMESTAMP))"), + Grain('quarter', _('quarter'), + "date_trunc('quarter', CAST({col} AS TIMESTAMP))"), + Grain("week_ending_saturday", _('week_ending_saturday'), + "date_add('day', 5, date_trunc('week', date_add('day', 1, " + "CAST({col} AS TIMESTAMP))))"), + Grain("week_start_sunday", _('week_start_sunday'), + "date_add('day', -1, date_trunc('week', " + "date_add('day', 1, CAST({col} AS TIMESTAMP))))"), + ) + + @classmethod + def convert_dttm(cls, target_type, dttm): + if target_type.upper() in ('DATE', 'DATETIME'): + return "from_iso8601_date('{}')".format(dttm.isoformat()) + return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S')) + + @classmethod + def epoch_to_dttm(cls): + return "from_unixtime({col})" + + +class MssqlEngineSpec(BaseEngineSpec): + engine = 'mssql' + epoch_to_dttm = "dateadd(S, {col}, '1970-01-01')" + + time_grains = ( + Grain("Time Column", _('Time Column'), "{col}"), + Grain("second", _('second'), "DATEADD(second, " + "DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')"), + Grain("minute", _('minute'), "DATEADD(minute, " + "DATEDIFF(minute, 0, {col}), 0)"), + Grain("5 minute", _('5 minute'), "DATEADD(minute, " + "DATEDIFF(minute, 0, {col}) / 5 * 5, 0)"), + Grain("half hour", _('half hour'), "DATEADD(minute, " + "DATEDIFF(minute, 0, {col}) / 30 * 30, 0)"), + Grain("hour", _('hour'), "DATEADD(hour, " + "DATEDIFF(hour, 0, {col}), 0)"), + Grain("day", _('day'), "DATEADD(day, " + "DATEDIFF(day, 0, {col}), 0)"), + Grain("week", _('week'), "DATEADD(week, " + "DATEDIFF(week, 0, {col}), 0)"), + Grain("month", _('month'), "DATEADD(month, " + "DATEDIFF(month, 0, {col}), 0)"), + Grain("quarter", _('quarter'), "DATEADD(quarter, " + "DATEDIFF(quarter, 0, {col}), 0)"), + Grain("year", _('year'), "DATEADD(year, " + "DATEDIFF(year, 0, {col}), 0)"), + ) + + @classmethod + def convert_dttm(cls, target_type, dttm): + return "CONVERT(DATETIME, '{}', 126)".format(iso) + + +class RedshiftEngineSpec(PostgresEngineSpec): + engine = 'redshift' + + +class OracleEngineSpec(PostgresEngineSpec): + engine = 'oracle' + + @classmethod + def convert_dttm(cls, target_type, dttm): + return ( + """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""" + ).format(dttm.isoformat()) + + +class VerticaEngineSpec(PostgresEngineSpec): + engine = 'vertica' + +engines = { + o.engine: o for o in globals().values() + if inspect.isclass(o) and issubclass(o, BaseEngineSpec)} diff --git a/caravel/models.py b/caravel/models.py index 706fc49a38410..2387c3dea4dfb 100644 --- a/caravel/models.py +++ b/caravel/models.py @@ -52,7 +52,7 @@ from werkzeug.datastructures import ImmutableMultiDict import caravel -from caravel import app, db, get_session, utils, sm +from caravel import app, db, db_engine_specs, get_session, utils, sm from caravel.source_registry import SourceRegistry from caravel.viz import viz_types from caravel.utils import flasher, MetricPermException, DimSelector @@ -277,6 +277,12 @@ def slice_url(self): "{obj.datasource_id}/".format(obj=self)) return href(slice_params) + @property + def slice_id_url(self): + return ( + "/caravel/{slc.datasource_type}/{slc.datasource_id}/{slc.id}/" + ).format(slc=self) + @property def edit_url(self): return "/slicemodelview/edit/{}".format(self.id) @@ -678,6 +684,12 @@ def all_view_names(self, schema=None): def all_schema_names(self): return sorted(self.inspector.get_schema_names()) + @property + def db_engine_spec(self): + engine_name = self.get_sqla_engine().name or 'base' + return db_engine_specs.engines.get( + engine_name, db_engine_specs.BaseEngineSpec) + def grains(self): """Defines time granularity database-specific expressions. @@ -687,113 +699,11 @@ def grains(self): each database has slightly different but similar datetime functions, this allows a mapping between database engines and actual functions. """ - Grain = namedtuple('Grain', 'name label function') - db_time_grains = { - 'presto': ( - Grain('Time Column', _('Time Column'), '{col}'), - Grain('second', _('second'), - "date_trunc('second', CAST({col} AS TIMESTAMP))"), - Grain('minute', _('minute'), - "date_trunc('minute', CAST({col} AS TIMESTAMP))"), - Grain('hour', _('hour'), - "date_trunc('hour', CAST({col} AS TIMESTAMP))"), - Grain('day', _('day'), - "date_trunc('day', CAST({col} AS TIMESTAMP))"), - Grain('week', _('week'), - "date_trunc('week', CAST({col} AS TIMESTAMP))"), - Grain('month', _('month'), - "date_trunc('month', CAST({col} AS TIMESTAMP))"), - Grain('quarter', _('quarter'), - "date_trunc('quarter', CAST({col} AS TIMESTAMP))"), - Grain("week_ending_saturday", _('week_ending_saturday'), - "date_add('day', 5, date_trunc('week', date_add('day', 1, " - "CAST({col} AS TIMESTAMP))))"), - Grain("week_start_sunday", _('week_start_sunday'), - "date_add('day', -1, date_trunc('week', " - "date_add('day', 1, CAST({col} AS TIMESTAMP))))"), - ), - 'mysql': ( - Grain('Time Column', _('Time Column'), '{col}'), - Grain("second", _('second'), "DATE_ADD(DATE({col}), " - "INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60" - " + SECOND({col})) SECOND)"), - Grain("minute", _('minute'), "DATE_ADD(DATE({col}), " - "INTERVAL (HOUR({col})*60 + MINUTE({col})) MINUTE)"), - Grain("hour", _('hour'), "DATE_ADD(DATE({col}), " - "INTERVAL HOUR({col}) HOUR)"), - Grain('day', _('day'), 'DATE({col})'), - Grain("week", _('week'), "DATE(DATE_SUB({col}, " - "INTERVAL DAYOFWEEK({col}) - 1 DAY))"), - Grain("month", _('month'), "DATE(DATE_SUB({col}, " - "INTERVAL DAYOFMONTH({col}) - 1 DAY))"), - ), - 'sqlite': ( - Grain('Time Column', _('Time Column'), '{col}'), - Grain('day', _('day'), 'DATE({col})'), - Grain("week", _('week'), - "DATE({col}, -strftime('%w', {col}) || ' days')"), - Grain("month", _('month'), - "DATE({col}, -strftime('%d', {col}) || ' days')"), - ), - 'postgresql': ( - Grain("Time Column", _('Time Column'), "{col}"), - Grain("second", _('second'), "DATE_TRUNC('second', {col})"), - Grain("minute", _('minute'), "DATE_TRUNC('minute', {col})"), - Grain("hour", _('hour'), "DATE_TRUNC('hour', {col})"), - Grain("day", _('day'), "DATE_TRUNC('day', {col})"), - Grain("week", _('week'), "DATE_TRUNC('week', {col})"), - Grain("month", _('month'), "DATE_TRUNC('month', {col})"), - Grain("year", _('year'), "DATE_TRUNC('year', {col})"), - ), - 'mssql': ( - Grain("Time Column", _('Time Column'), "{col}"), - Grain("second", _('second'), "DATEADD(second, " - "DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')"), - Grain("minute", _('minute'), "DATEADD(minute, " - "DATEDIFF(minute, 0, {col}), 0)"), - Grain("5 minute", _('5 minute'), "DATEADD(minute, " - "DATEDIFF(minute, 0, {col}) / 5 * 5, 0)"), - Grain("half hour", _('half hour'), "DATEADD(minute, " - "DATEDIFF(minute, 0, {col}) / 30 * 30, 0)"), - Grain("hour", _('hour'), "DATEADD(hour, " - "DATEDIFF(hour, 0, {col}), 0)"), - Grain("day", _('day'), "DATEADD(day, " - "DATEDIFF(day, 0, {col}), 0)"), - Grain("week", _('week'), "DATEADD(week, " - "DATEDIFF(week, 0, {col}), 0)"), - Grain("month", _('month'), "DATEADD(month, " - "DATEDIFF(month, 0, {col}), 0)"), - Grain("quarter", _('quarter'), "DATEADD(quarter, " - "DATEDIFF(quarter, 0, {col}), 0)"), - Grain("year", _('year'), "DATEADD(year, " - "DATEDIFF(year, 0, {col}), 0)"), - ), - } - db_time_grains['redshift'] = db_time_grains['postgresql'] - db_time_grains['vertica'] = db_time_grains['postgresql'] - for db_type, grains in db_time_grains.items(): - if self.sqlalchemy_uri.startswith(db_type): - return grains + return self.db_engine_spec.time_grains def grains_dict(self): return {grain.name: grain for grain in self.grains()} - def epoch_to_dttm(self, ms=False): - """Database-specific SQL to convert unix timestamp to datetime - """ - ts2date_exprs = { - 'sqlite': "datetime({col}, 'unixepoch')", - 'postgresql': "(timestamp 'epoch' + {col} * interval '1 second')", - 'mysql': "from_unixtime({col})", - 'mssql': "dateadd(S, {col}, '1970-01-01')" - } - ts2date_exprs['redshift'] = ts2date_exprs['postgresql'] - ts2date_exprs['vertica'] = ts2date_exprs['postgresql'] - for db_type, expr in ts2date_exprs.items(): - if self.sqlalchemy_uri.startswith(db_type): - return expr.replace('{col}', '({col}/1000.0)') if ms else expr - raise Exception(_("Unable to convert unix epoch to datetime")) - def get_extra(self): extra = {} if self.extra: @@ -1028,12 +938,13 @@ def visit_column(element, compiler, **kw): # Transforming time grain into an expression based on configuration time_grain_sqla = extras.get('time_grain_sqla') if time_grain_sqla: + db_engine_spec = self.database.db_engine_spec if dttm_col.python_date_format == 'epoch_s': - dttm_expr = self.database.epoch_to_dttm().format( - col=dttm_expr) + dttm_expr = \ + db_engine_spec.epoch_to_dttm().format(col=dttm_expr) elif dttm_col.python_date_format == 'epoch_ms': - dttm_expr = self.database.epoch_to_dttm(ms=True).format( - col=dttm_expr) + dttm_expr = \ + db_engine_spec.epoch_ms_to_dttm().format(col=dttm_expr) udf = self.database.grains_dict().get(time_grain_sqla, '{col}') timestamp_grain = literal_column( udf.function.format(col=dttm_expr), type_=DateTime).label('timestamp') @@ -1434,7 +1345,7 @@ def import_obj(cls, column_to_import): return column_to_import def dttm_sql_literal(self, dttm): - """Convert datetime object to string + """Convert datetime object to a SQL expression string If database_expression is empty, the internal dttm will be parsed as the string with the pattern that @@ -1442,6 +1353,7 @@ def dttm_sql_literal(self, dttm): If database_expression is not empty, the internal dttm will be parsed as the sql sentence for the database to convert """ + tf = self.python_date_format or '%Y-%m-%d %H:%M:%S.%f' if self.database_expression: return self.database_expression.format(dttm.strftime('%Y-%m-%d %H:%M:%S')) @@ -1450,21 +1362,9 @@ def dttm_sql_literal(self, dttm): elif tf == 'epoch_ms': return str((dttm - datetime(1970, 1, 1)).total_seconds() * 1000.0) else: - default = "'{}'".format(dttm.strftime(tf)) - iso = dttm.isoformat() - d = { - 'mssql': "CONVERT(DATETIME, '{}', 126)".format(iso), # untested - 'mysql': default, - 'oracle': - """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""".format( - dttm.isoformat()), - 'presto': default, - 'sqlite': default, - } - for k, v in d.items(): - if self.table.database.sqlalchemy_uri.startswith(k): - return v - return default + s = self.table.database.db_engine_spec.convert_dttm( + self.type, dttm) + return s or "'{}'".format(dttm.strftime(tf)) class DruidCluster(Model, AuditMixinNullable): diff --git a/caravel/utils.py b/caravel/utils.py index 168656986f0aa..97fedfe451bd5 100644 --- a/caravel/utils.py +++ b/caravel/utils.py @@ -31,7 +31,7 @@ class CaravelException(Exception): pass -class CaravelTimeoutException(Exception): +class CaravelTimeoutException(CaravelException): pass @@ -39,7 +39,11 @@ class CaravelSecurityException(CaravelException): pass -class MetricPermException(Exception): +class MetricPermException(CaravelException): + pass + + +class NoDataException(CaravelException): pass diff --git a/caravel/viz.py b/caravel/viz.py index ebb4b4f9c69cf..6cd9db366bb37 100755 --- a/caravel/viz.py +++ b/caravel/viz.py @@ -175,7 +175,7 @@ def get_df(self, query_obj=None): # If the datetime format is unix, the parse will use the corresponding # parsing logic. if df is None or df.empty: - raise Exception("No data, review your incantations!") + raise utils.NoDataException("No data.") else: if 'timestamp' in df.columns: if timestamp_format in ("epoch_s", "epoch_ms"): diff --git a/run_specific_test.sh b/run_specific_test.sh index c63a459d27aea..da49899a5923e 100755 --- a/run_specific_test.sh +++ b/run_specific_test.sh @@ -5,4 +5,4 @@ export CARAVEL_CONFIG=tests.caravel_test_config set -e caravel/bin/caravel version -v export SOLO_TEST=1 -nosetests tests.core_tests:CoreTests.test_slice_endpoint +nosetests tests.core_tests:CoreTests.test_slices diff --git a/tests/core_tests.py b/tests/core_tests.py index bd09bfd8ca22d..85599c288908b 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -130,8 +130,7 @@ def test_slices(self): (slc.slice_name, 'slice_url', slc.slice_url), (slc.slice_name, 'json_endpoint', slc.viz.json_endpoint), (slc.slice_name, 'csv_endpoint', slc.viz.csv_endpoint), - (slc.slice_name, 'slice_id_url', - "/caravel/{slc.datasource_type}/{slc.datasource_id}/{slc.id}/".format(slc=slc)), + (slc.slice_name, 'slice_id_url', slc.slice_id_url), ] for name, method, url in urls: print("[{name}]/[{method}]: {url}".format(**locals())) @@ -401,7 +400,6 @@ def test_public_user_dashboard_access(self): resp = self.get_resp('/dashboardmodelview/list/') assert "/caravel/dashboard/births/" in resp - print(self.get_resp('/caravel/dashboard/births/')) assert 'Births' in self.get_resp('/caravel/dashboard/births/') # Confirm that public doesn't have access to other datasets.