diff --git a/superset/config.py b/superset/config.py index d9106bbd01c73..00889572ecacc 100644 --- a/superset/config.py +++ b/superset/config.py @@ -275,8 +275,13 @@ # Set this API key to enable Mapbox visualizations MAPBOX_API_KEY = os.environ.get('MAPBOX_API_KEY', '') -# Maximum number of rows returned in the SQL editor -SQL_MAX_ROW = 1000 +# Maximum number of rows returned from a database +# in async mode, no more than SQL_MAX_ROW will be returned and stored +# in the results backend. This also becomes the limit when exporting CSVs +SQL_MAX_ROW = 100000 + +# Limit to be returned to the frontend. +DISPLAY_MAX_ROW = 1000 # Maximum number of tables/views displayed in the dropdown window in SQL Lab. MAX_TABLE_NAMES = 3000 @@ -302,8 +307,6 @@ class CeleryConfig(object): CELERY_CONFIG = CeleryConfig """ CELERY_CONFIG = None -SQL_CELERY_DB_FILE_PATH = os.path.join(DATA_DIR, 'celerydb.sqlite') -SQL_CELERY_RESULTS_DB_FILE_PATH = os.path.join(DATA_DIR, 'celery_results.sqlite') # static http headers to be served by your Superset server. # This header prevents iFrames from other domains and diff --git a/superset/views/core.py b/superset/views/core.py index 6fc9e850543b8..9324a8d393edf 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2351,7 +2351,18 @@ def results(self, key): return json_error_response(security_manager.get_table_access_error_msg( '{}'.format(rejected_tables)), status=403) - return json_success(utils.zlib_decompress_to_string(blob)) + payload = utils.zlib_decompress_to_string(blob) + display_limit = app.config.get('DISPLAY_MAX_ROW', None) + if display_limit: + payload_json = json.loads(payload) + payload_json['data'] = payload_json['data'][:display_limit] + return json_success( + json.dumps( + payload_json, + default=utils.json_iso_dttm_ser, + ignore_nan=True, + ), + ) @has_access_api @expose('/stop_query/', methods=['POST']) @@ -2407,7 +2418,7 @@ def sql_json_call(self, request): tmp_table_name, ) - client_id = request.form.get('client_id') or utils.shortid() + client_id = request.form.get('client_id') or utils.shortid()[:10] query = Query( database_id=int(database_id), diff --git a/tests/base_tests.py b/tests/base_tests.py index 782cedd0a19e3..269a7cb80509a 100644 --- a/tests/base_tests.py +++ b/tests/base_tests.py @@ -223,7 +223,7 @@ def revoke_public_access_to_table(self, table): perm.view_menu and table.perm in perm.view_menu.name): security_manager.del_permission_role(public_role, perm) - def run_sql(self, sql, client_id, user_name=None, raise_on_error=False): + def run_sql(self, sql, client_id=None, user_name=None, raise_on_error=False): if user_name: self.logout() self.login(username=(user_name if user_name else 'admin')) diff --git a/tests/celery_tests.py b/tests/celery_tests.py index 243aacef4586d..c521729cf3cdf 100644 --- a/tests/celery_tests.py +++ b/tests/celery_tests.py @@ -6,7 +6,6 @@ from __future__ import unicode_literals import json -import os import subprocess import time import unittest @@ -14,7 +13,7 @@ import pandas as pd from past.builtins import basestring -from superset import app, cli, db, security_manager +from superset import app, db from superset.models.helpers import QueryStatus from superset.models.sql_lab import Query from superset.sql_parse import SupersetQuery @@ -22,13 +21,12 @@ BASE_DIR = app.config.get('BASE_DIR') +CELERY_SLEEP_TIME = 5 class CeleryConfig(object): - BROKER_URL = 'sqla+sqlite:///' + app.config.get('SQL_CELERY_DB_FILE_PATH') + BROKER_URL = app.config.get('CELERY_RESULT_BACKEND') CELERY_IMPORTS = ('superset.sql_lab', ) - CELERY_RESULT_BACKEND = ( - 'db+sqlite:///' + app.config.get('SQL_CELERY_RESULTS_DB_FILE_PATH')) CELERY_ANNOTATIONS = {'sql_lab.add': {'rate_limit': '10/s'}} CONCURRENCY = 1 @@ -88,29 +86,13 @@ def get_query_by_id(self, id): @classmethod def setUpClass(cls): - try: - os.remove(app.config.get('SQL_CELERY_DB_FILE_PATH')) - except OSError as e: - app.logger.warn(str(e)) - try: - os.remove(app.config.get('SQL_CELERY_RESULTS_DB_FILE_PATH')) - except OSError as e: - app.logger.warn(str(e)) - - security_manager.sync_role_definitions() - - worker_command = BASE_DIR + '/bin/superset worker' + db.session.query(Query).delete() + db.session.commit() + + worker_command = BASE_DIR + '/bin/superset worker -w 2' subprocess.Popen( worker_command, shell=True, stdout=subprocess.PIPE) - admin = security_manager.find_user('admin') - if not admin: - security_manager.add_user( - 'admin', 'admin', ' user', 'admin@fab.org', - security_manager.find_role('Admin'), - password='general') - cli.load_examples_run(load_test_data=True) - @classmethod def tearDownClass(cls): subprocess.call( @@ -122,7 +104,7 @@ def tearDownClass(cls): shell=True, ) - def run_sql(self, db_id, sql, client_id, cta='false', tmp_table='tmp', + def run_sql(self, db_id, sql, client_id=None, cta='false', tmp_table='tmp', async_='false'): self.login() resp = self.client.post( @@ -150,11 +132,13 @@ def test_run_sync_query_cta(self): main_db = self.get_main_database(db.session) db_id = main_db.id eng = main_db.get_sqla_engine() + tmp_table_name = 'tmp_async_22' + self.drop_table_if_exists(tmp_table_name, main_db) perm_name = 'can_sql_json' sql_where = ( "SELECT name FROM ab_permission WHERE name='{}'".format(perm_name)) result2 = self.run_sql( - db_id, sql_where, '2', tmp_table='tmp_table_2', cta='true') + db_id, sql_where, '2', tmp_table=tmp_table_name, cta='true') self.assertEqual(QueryStatus.SUCCESS, result2['query']['state']) self.assertEqual([], result2['data']) self.assertEqual([], result2['columns']) @@ -169,8 +153,7 @@ def test_run_sync_query_cta_no_data(self): main_db = self.get_main_database(db.session) db_id = main_db.id sql_empty_result = 'SELECT * FROM ab_user WHERE id=666' - result3 = self.run_sql( - db_id, sql_empty_result, '3', tmp_table='tmp_table_3', cta='true') + result3 = self.run_sql(db_id, sql_empty_result, '3') self.assertEqual(QueryStatus.SUCCESS, result3['query']['state']) self.assertEqual([], result3['data']) self.assertEqual([], result3['columns']) @@ -178,22 +161,31 @@ def test_run_sync_query_cta_no_data(self): query3 = self.get_query_by_id(result3['query']['serverId']) self.assertEqual(QueryStatus.SUCCESS, query3.status) + def drop_table_if_exists(self, table_name, database=None): + """Drop table if it exists, works on any DB""" + sql = 'DROP TABLE {}'.format(table_name) + db_id = database.id + if database: + database.allow_dml = True + db.session.flush() + return self.run_sql(db_id, sql) + def test_run_async_query(self): - main_db = self.get_main_database(db.session) - eng = main_db.get_sqla_engine() + main_db = get_main_database(db.session) + db_id = main_db.id + + self.drop_table_if_exists('tmp_async_1', main_db) + sql_where = "SELECT name FROM ab_role WHERE name='Admin'" result = self.run_sql( - main_db.id, sql_where, '4', async_='true', tmp_table='tmp_async_1', + db_id, sql_where, '4', async_='true', tmp_table='tmp_async_1', cta='true') assert result['query']['state'] in ( QueryStatus.PENDING, QueryStatus.RUNNING, QueryStatus.SUCCESS) - time.sleep(1) + time.sleep(CELERY_SLEEP_TIME) query = self.get_query_by_id(result['query']['serverId']) - df = pd.read_sql_query(query.select_sql, con=eng) - self.assertEqual(QueryStatus.SUCCESS, query.status) - self.assertEqual([{'name': 'Admin'}], df.to_dict(orient='records')) self.assertEqual(QueryStatus.SUCCESS, query.status) self.assertTrue('FROM tmp_async_1' in query.select_sql) self.assertEqual( @@ -201,27 +193,25 @@ def test_run_async_query(self): "WHERE name='Admin' LIMIT 666", query.executed_sql) self.assertEqual(sql_where, query.sql) self.assertEqual(0, query.rows) - self.assertEqual(666, query.limit) self.assertEqual(False, query.limit_used) self.assertEqual(True, query.select_as_cta) self.assertEqual(True, query.select_as_cta_used) def test_run_async_query_with_lower_limit(self): main_db = self.get_main_database(db.session) - eng = main_db.get_sqla_engine() + db_id = main_db.id + self.drop_table_if_exists('tmp_async_2', main_db) + sql_where = "SELECT name FROM ab_role WHERE name='Alpha' LIMIT 1" result = self.run_sql( - main_db.id, sql_where, '5', async_='true', tmp_table='tmp_async_2', + db_id, sql_where, '5', async_='true', tmp_table='tmp_async_2', cta='true') assert result['query']['state'] in ( QueryStatus.PENDING, QueryStatus.RUNNING, QueryStatus.SUCCESS) - time.sleep(1) + time.sleep(CELERY_SLEEP_TIME) query = self.get_query_by_id(result['query']['serverId']) - df = pd.read_sql_query(query.select_sql, con=eng) - self.assertEqual(QueryStatus.SUCCESS, query.status) - self.assertEqual([{'name': 'Alpha'}], df.to_dict(orient='records')) self.assertEqual(QueryStatus.SUCCESS, query.status) self.assertTrue('FROM tmp_async_2' in query.select_sql) self.assertEqual( diff --git a/tests/superset_test_config.py b/tests/superset_test_config.py index 3076a0556cb3b..aacbd6a0ad76e 100644 --- a/tests/superset_test_config.py +++ b/tests/superset_test_config.py @@ -12,7 +12,6 @@ if 'SUPERSET__SQLALCHEMY_DATABASE_URI' in os.environ: SQLALCHEMY_DATABASE_URI = os.environ.get('SUPERSET__SQLALCHEMY_DATABASE_URI') -SQL_CELERY_RESULTS_DB_FILE_PATH = os.path.join(DATA_DIR, 'celery_results.sqlite') SQL_SELECT_AS_CTA = True SQL_MAX_ROW = 666 @@ -28,7 +27,6 @@ class CeleryConfig(object): BROKER_URL = 'redis://localhost' CELERY_IMPORTS = ('superset.sql_lab', ) - CELERY_RESULT_BACKEND = 'db+sqlite:///' + SQL_CELERY_RESULTS_DB_FILE_PATH CELERY_ANNOTATIONS = {'sql_lab.add': {'rate_limit': '10/s'}} CONCURRENCY = 1