apache · bkyryliuk · Aug 6, 2020 · Jul 31, 2020 · Aug 4, 2020 · Aug 5, 2020
@@ -92,6 +92,66 @@ jobs:
       - name: Test babel extraction
         run: flask fab babel-extract --target superset/translations --output superset/translations/messages.pot --config superset/translations/babel.cfg -k _,__,t,tn,tct
 
+  test-postgres-presto:
+      runs-on: ubuntu-18.04
+      strategy:
+        matrix:
+          # run unit tests in multiple version just for fun
+          python-version: [3.8]
+      env:
+        PYTHONPATH: ${{ github.workspace }}
+        SUPERSET_CONFIG: tests.superset_test_config
+        REDIS_PORT: 16379
+        SUPERSET__SQLALCHEMY_DATABASE_URI:
+          postgresql+psycopg2://superset:[email protected]:15432/superset
+        SUPERSET__SQLALCHEMY_EXAMPLES_URI:
+          presto://localhost:15433/memory/default
+      services:
+        postgres:
+          image: postgres:10-alpine
+          env:
+            POSTGRES_USER: superset
+            POSTGRES_PASSWORD: superset
+          ports:
+            # Use custom ports for services to avoid accidentally connecting to
+            # GitHub action runner's default installations
+            - 15432:5432
+        presto:
+          image: prestosql/presto:339
+          env:
+            POSTGRES_USER: superset
+            POSTGRES_PASSWORD: superset
+          ports:
+            # Use custom ports for services to avoid accidentally connecting to
+            # GitHub action runner's default installations
+            - 15433:8080
+        redis:
+          image: redis:5-alpine
+          ports:
+            - 16379:6379
+      steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/[email protected]
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        uses: apache-superset/cached-dependencies@b90713b
+        with:
+          run: |
+            apt-get-install
+            pip-upgrade
+            pip install -r requirements/testing.txt
+            setup-postgres
+      - name: Run celery
+        run: celery worker --app=superset.tasks.celery_app:app -Ofair -c 2 &
+      - name: Python unit tests (PostgreSQL)
+        run: |
+          ./scripts/python_tests.sh
+      - name: Upload code coverage
+        run: |
+          bash <(curl -s https://codecov.io/bash) -cF python
+
   test-postgres:
     runs-on: ubuntu-18.04
     strategy:

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -16,8 +16,8 @@ babel==2.8.0              # via flask-babel
 backoff==1.10.0           # via apache-superset
 billiard==3.6.3.0         # via celery
 bleach==3.1.5             # via apache-superset
-boto3==1.14.34            # via tabulator
-botocore==1.17.34         # via boto3, s3transfer
+boto3==1.14.36            # via tabulator
+botocore==1.17.36         # via boto3, s3transfer
 brotli==1.0.7             # via flask-compress
 cached-property==1.5.1    # via tableschema
 cachelib==0.1.1           # via apache-superset
@@ -55,9 +55,8 @@ geographiclib==1.50       # via geopy
 geopy==2.0.0              # via apache-superset
 gunicorn==20.0.4          # via apache-superset
 humanize==2.5.0           # via apache-superset
-idna-ssl==1.1.0           # via aiohttp
-idna==2.10                # via email-validator, idna-ssl, requests, yarl
-ijson==3.1.post0          # via tabulator
+idna==2.10                # via email-validator, requests, yarl
+ijson==3.1.1              # via tabulator
 importlib-metadata==1.7.0  # via jsonschema, kombu, markdown
 isodate==0.6.0            # via apache-superset, tableschema
 itsdangerous==1.1.0       # via flask, flask-wtf
@@ -92,7 +91,7 @@ py==1.9.0                 # via retry
 pyarrow==0.17.1           # via apache-superset
 pycparser==2.20           # via cffi
 pydruid==0.6.1            # via apache-superset
-pyhive[hive]==0.6.2       # via apache-superset
+pyhive[hive]==0.6.3       # via apache-superset
 pyjwt==1.7.1              # via flask-appbuilder, flask-jwt-extended
 pyparsing==2.4.7          # via packaging
 pyrsistent==0.16.0        # via jsonschema
@@ -119,7 +118,7 @@ tableschema==1.19.2       # via apache-superset
 tabulator==1.52.3         # via tableschema
 thrift-sasl==0.4.2        # via pyhive
 thrift==0.13.0            # via apache-superset, pyhive, thrift-sasl
-typing-extensions==3.7.4.2  # via aiohttp, yarl
+typing-extensions==3.7.4.2  # via yarl
 unicodecsv==0.14.1        # via tableschema, tabulator
 urllib3==1.25.10          # via botocore, requests, selenium
 vine==1.3.0               # via amqp, celery

diff --git a/requirements/integration.txt b/requirements/integration.txt
@@ -12,7 +12,6 @@ distlib==0.3.1            # via virtualenv
 filelock==3.0.12          # via tox, virtualenv
 identify==1.4.25          # via pre-commit
 importlib-metadata==1.7.0  # via pluggy, pre-commit, tox, virtualenv
-importlib-resources==3.0.0  # via pre-commit, virtualenv
 nodeenv==1.4.0            # via pre-commit
 packaging==20.4           # via tox
 pip-compile-multi==1.5.8  # via -r requirements/integration.in
@@ -26,8 +25,8 @@ six==1.15.0               # via packaging, pip-tools, tox, virtualenv
 toml==0.10.1              # via pre-commit, tox
 toposort==1.5             # via pip-compile-multi
 tox==3.18.1               # via -r requirements/integration.in
-virtualenv==20.0.29       # via pre-commit, tox
-zipp==3.1.0               # via importlib-metadata, importlib-resources
+virtualenv==20.0.30       # via pre-commit, tox
+zipp==3.1.0               # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
diff --git a/requirements/testing.in b/requirements/testing.in
@@ -20,6 +20,7 @@ flask-testing
 openapi-spec-validator
 openpyxl
 parameterized
+pyhive[presto]>=0.6.3
 pylint
 pytest
 pytest-cov

diff --git a/requirements/testing.txt b/requirements/testing.txt
@@ -1,4 +1,4 @@
-# SHA1:785ae7ffcde3cee8ebcc0a839cdb8e61e693d329
+# SHA1:e7b15a12c98ccce1cc4b8ee977205f141201b761
 #
 # This file is autogenerated by pip-compile-multi
 # To update, run:
@@ -18,6 +18,7 @@ mccabe==0.6.1             # via pylint
 more-itertools==8.4.0     # via pytest
 openapi-spec-validator==0.2.9  # via -r requirements/testing.in
 parameterized==0.7.4      # via -r requirements/testing.in
+pyhive[hive,presto]==0.6.3  # via -r requirements/testing.in, apache-superset
 pylint==2.5.3             # via -r requirements/testing.in
 pytest-cov==2.10.0        # via -r requirements/testing.in
 pytest==6.0.1             # via -r requirements/testing.in, pytest-cov

diff --git a/superset/examples/birth_names.py b/superset/examples/birth_names.py
@@ -54,19 +54,26 @@ def gen_filter(
 
 def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
     pdf = pd.read_json(get_example_data("birth_names.json.gz"))
-    pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
+    if database.backend == "presto":
+        pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
+        pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
+    else:
+        pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
     pdf = pdf.head(100) if sample else pdf
+
     pdf.to_sql(
         tbl_name,
         database.get_sqla_engine(),
         if_exists="replace",
         chunksize=500,
         dtype={
-            "ds": DateTime,
+            # TODO(bkyryliuk): use TIMESTAMP type for presto
+            "ds": DateTime if database.backend != "presto" else String(255),
             "gender": String(16),
             "state": String(10),
             "name": String(255),
         },
+        method="multi",
         index=False,
     )
     print("Done loading table!")

diff --git a/superset/examples/multiformat_time_series.py b/superset/examples/multiformat_time_series.py
@@ -44,17 +44,23 @@ def load_multiformat_time_series(
     if not only_metadata and (not table_exists or force):
         data = get_example_data("multiformat_time_series.json.gz")
         pdf = pd.read_json(data)
+        if database.backend == "presto":
+            pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+            pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d")
+            pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
+            pdf.ds2 = pdf.ds2.dt.strftime("%Y-%m-%d %H:%M%:%S")
+        else:
+            pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+            pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
 
-        pdf.ds = pd.to_datetime(pdf.ds, unit="s")
-        pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
         pdf.to_sql(
             tbl_name,
             database.get_sqla_engine(),
             if_exists="replace",
             chunksize=500,
             dtype={
-                "ds": Date,
-                "ds2": DateTime,
+                "ds": String(255) if database.backend == "presto" else Date,
+                "ds2": String(255) if database.backend == "presto" else DateTime,
                 "epoch_s": BigInteger,
                 "epoch_ms": BigInteger,
                 "string0": String(100),

diff --git a/superset/examples/random_time_series.py b/superset/examples/random_time_series.py
@@ -16,7 +16,7 @@
 # under the License.
 
 import pandas as pd
-from sqlalchemy import DateTime
+from sqlalchemy import DateTime, String
 
 from superset import db
 from superset.models.slice import Slice
@@ -36,13 +36,18 @@ def load_random_time_series_data(
     if not only_metadata and (not table_exists or force):
         data = get_example_data("random_time_series.json.gz")
         pdf = pd.read_json(data)
-        pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+        if database.backend == "presto":
+            pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+            pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
+        else:
+            pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+
         pdf.to_sql(
             tbl_name,
             database.get_sqla_engine(),
             if_exists="replace",
             chunksize=500,
-            dtype={"ds": DateTime},
+            dtype={"ds": DateTime if database.backend != "presto" else String(255)},
             index=False,
         )
         print("Done loading table!")

diff --git a/superset/examples/world_bank.py b/superset/examples/world_bank.py
@@ -53,19 +53,26 @@ def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals, too-many-s
         data = get_example_data("countries.json.gz")
         pdf = pd.read_json(data)
         pdf.columns = [col.replace(".", "_") for col in pdf.columns]
-        pdf.year = pd.to_datetime(pdf.year)
+        if database.backend == "presto":
+            pdf.year = pd.to_datetime(pdf.year)
+            pdf.year = pdf.year.dt.strftime("%Y-%m-%d %H:%M%:%S")
+        else:
+            pdf.year = pd.to_datetime(pdf.year)
         pdf = pdf.head(100) if sample else pdf
+
         pdf.to_sql(
             tbl_name,
             database.get_sqla_engine(),
             if_exists="replace",
             chunksize=50,
             dtype={
-                "year": DateTime(),
+                # TODO(bkyryliuk): use TIMESTAMP type for presto
+                "year": DateTime if database.backend != "presto" else String(255),
                 "country_code": String(3),
                 "country_name": String(255),
                 "region": String(255),
             },
+            method="multi",
             index=False,
         )
 

diff --git a/superset/utils/core.py b/superset/utils/core.py
@@ -1022,6 +1022,13 @@ def get_example_database() -> "Database":
     return get_or_create_db("examples", db_uri)
 
 
+def get_main_database() -> "Database":
+    from superset import conf
+
+    db_uri = conf.get("SQLALCHEMY_DATABASE_URI")
+    return get_or_create_db("main", db_uri)
+
+
 def is_adhoc_metric(metric: Metric) -> bool:
     return bool(
         isinstance(metric, dict)

diff --git a/tests/base_tests.py b/tests/base_tests.py
@@ -49,6 +49,7 @@ class SupersetTestCase(TestCase):
         "sqlite": "main",
         "mysql": "superset",
         "postgresql": "public",
+        "presto": "default",
     }
 
     maxDiff = -1