Skip to content

Commit

Permalink
[thumbnails] API and celery task for dashboards and charts (#8947)
Browse files Browse the repository at this point in the history
  • Loading branch information
dpgaspar authored Apr 15, 2020
1 parent 1ccda92 commit d81f720
Show file tree
Hide file tree
Showing 21 changed files with 1,141 additions and 13 deletions.
68 changes: 68 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,74 @@ section in `config.py`:
This will cache all the charts in the top 5 most popular dashboards every hour.
For other strategies, check the `superset/tasks/cache.py` file.

Caching Thumbnails
------------------

This is an optional feature that can be turned on by activating it's feature flag on config:

.. code-block:: python
FEATURE_FLAGS = {
"THUMBNAILS": True,
"THUMBNAILS_SQLA_LISTENERS": True,
}
For this feature you will need a cache system and celery workers. All thumbnails are store on cache and are processed
asynchronously by the workers.

An example config where images are stored on S3 could be:

.. code-block:: python
from flask import Flask
from s3cache.s3cache import S3Cache
...
class CeleryConfig(object):
BROKER_URL = "redis://localhost:6379/0"
CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks", "superset.tasks.thumbnails")
CELERY_RESULT_BACKEND = "redis://localhost:6379/0"
CELERYD_PREFETCH_MULTIPLIER = 10
CELERY_ACKS_LATE = True
CELERY_CONFIG = CeleryConfig
def init_thumbnail_cache(app: Flask) -> S3Cache:
return S3Cache("bucket_name", 'thumbs_cache/')
THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache
# Async selenium thumbnail task will use the following user
THUMBNAIL_SELENIUM_USER = "Admin"
Using the above example cache keys for dashboards will be `superset_thumb__dashboard__{ID}`

You can override the base URL for selenium using:

.. code-block:: python
WEBDRIVER_BASEURL = "https://superset.company.com"
Additional selenium web drive config can be set using `WEBDRIVER_CONFIGURATION`

You can implement a custom function to authenticate selenium, the default uses flask-login session cookie.
An example of a custom function signature:

.. code-block:: python
def auth_driver(driver: WebDriver, user: "User") -> WebDriver:
pass
Then on config:

.. code-block:: python
WEBDRIVER_AUTH_FUNC = auth_driver
Deeper SQLAlchemy integration
-----------------------------
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ redis==3.2.1
requests==2.22.0
statsd==3.3.0
tox==3.11.1
pillow==7.0.0
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def get_git_sha():
"hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"],
"dremio": ["sqlalchemy_dremio>=1.1.0"],
"cockroachdb": ["cockroachdb==0.3.3"],
"thumbnails": ["Pillow>=7.0.0, <8.0.0"],
},
python_requires="~=3.6",
author="Apache Software Foundation",
Expand Down
1 change: 1 addition & 0 deletions superset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@
lambda: results_backend_manager.should_use_msgpack
)
tables_cache = LocalProxy(lambda: cache_manager.tables_cache)
thumbnail_cache = LocalProxy(lambda: cache_manager.thumbnail_cache)
83 changes: 76 additions & 7 deletions superset/charts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
# specific language governing permissions and limitations
# under the License.
import logging
from typing import Any
from typing import Any, Dict

import simplejson
from flask import g, make_response, request, Response
from flask import g, make_response, redirect, request, Response, url_for
from flask_appbuilder.api import expose, protect, rison, safe
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_babel import ngettext
from werkzeug.wrappers import Response as WerkzeugResponse
from werkzeug.wsgi import FileWrapper

from superset import is_feature_enabled, thumbnail_cache
from superset.charts.commands.bulk_delete import BulkDeleteChartCommand
from superset.charts.commands.create import CreateChartCommand
from superset.charts.commands.delete import DeleteChartCommand
Expand All @@ -41,13 +44,16 @@
ChartPostSchema,
ChartPutSchema,
get_delete_ids_schema,
thumbnail_query_schema,
)
from superset.common.query_context import QueryContext
from superset.constants import RouteMethod
from superset.exceptions import SupersetSecurityException
from superset.extensions import event_logger, security_manager
from superset.models.slice import Slice
from superset.tasks.thumbnails import cache_chart_thumbnail
from superset.utils.core import json_int_dttm_ser
from superset.utils.screenshots import ChartScreenshot
from superset.views.base_api import BaseSupersetModelRestApi, RelatedFieldFilter
from superset.views.filters import FilterRelatedOwners

Expand Down Expand Up @@ -131,6 +137,11 @@ class ChartRestApi(BaseSupersetModelRestApi):
}
allowed_rel_fields = {"owners"}

def __init__(self) -> None:
if is_feature_enabled("THUMBNAILS"):
self.include_route_methods = self.include_route_methods | {"thumbnail"}
super().__init__()

@expose("/", methods=["POST"])
@protect()
@safe
Expand Down Expand Up @@ -440,13 +451,9 @@ def data(self) -> Response:
type: object
400:
$ref: '#/components/responses/400'
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
500:
$ref: '#/components/responses/500'
"""
"""
if not request.is_json:
return self.response_400(message="Request is not JSON")
try:
Expand All @@ -464,3 +471,65 @@ def data(self) -> Response:
resp = make_response(response_data, 200)
resp.headers["Content-Type"] = "application/json; charset=utf-8"
return resp

@expose("/<pk>/thumbnail/<digest>/", methods=["GET"])
@protect()
@rison(thumbnail_query_schema)
@safe
def thumbnail(
self, pk: int, digest: str, **kwargs: Dict[str, bool]
) -> WerkzeugResponse:
"""Get Chart thumbnail
---
get:
description: Compute or get already computed chart thumbnail from cache
parameters:
- in: path
schema:
type: integer
name: pk
- in: path
schema:
type: string
name: sha
responses:
200:
description: Chart thumbnail image
content:
image/*:
schema:
type: string
format: binary
302:
description: Redirects to the current digest
400:
$ref: '#/components/responses/400'
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
500:
$ref: '#/components/responses/500'
"""
chart = self.datamodel.get(pk, self._base_filters)
if not chart:
return self.response_404()
if kwargs["rison"].get("force", False):
cache_chart_thumbnail.delay(chart.id, force=True)
return self.response(202, message="OK Async")
# fetch the chart screenshot using the current user and cache if set
screenshot = ChartScreenshot(pk).get_from_cache(cache=thumbnail_cache)
# If not screenshot then send request to compute thumb to celery
if not screenshot:
cache_chart_thumbnail.delay(chart.id, force=True)
return self.response(202, message="OK Async")
# If digests
if chart.digest != digest:
return redirect(
url_for(
f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest
)
)
return Response(
FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True
)
4 changes: 4 additions & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from superset.utils import core as utils

get_delete_ids_schema = {"type": "array", "items": {"type": "integer"}}
thumbnail_query_schema = {
"type": "object",
"properties": {"force": {"type": "boolean"}},
}


def validate_json(value: Union[bytes, bytearray, str]) -> None:
Expand Down
73 changes: 73 additions & 0 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datetime import datetime
from subprocess import Popen
from sys import stdout
from typing import Type, Union

import click
import yaml
Expand Down Expand Up @@ -454,6 +455,78 @@ def flower(port, address):
Popen(cmd, shell=True).wait()


@superset.command()
@with_appcontext
@click.option(
"--asynchronous",
"-a",
is_flag=True,
default=False,
help="Trigger commands to run remotely on a worker",
)
@click.option(
"--dashboards_only",
"-d",
is_flag=True,
default=False,
help="Only process dashboards",
)
@click.option(
"--charts_only", "-c", is_flag=True, default=False, help="Only process charts"
)
@click.option(
"--force",
"-f",
is_flag=True,
default=False,
help="Force refresh, even if previously cached",
)
@click.option("--model_id", "-i", multiple=True)
def compute_thumbnails(
asynchronous: bool,
dashboards_only: bool,
charts_only: bool,
force: bool,
model_id: int,
):
"""Compute thumbnails"""
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.tasks.thumbnails import (
cache_chart_thumbnail,
cache_dashboard_thumbnail,
)

def compute_generic_thumbnail(
friendly_type: str,
model_cls: Union[Type[Dashboard], Type[Slice]],
model_id: int,
compute_func,
):
query = db.session.query(model_cls)
if model_id:
query = query.filter(model_cls.id.in_(model_id))
dashboards = query.all()
count = len(dashboards)
for i, model in enumerate(dashboards):
if asynchronous:
func = compute_func.delay
action = "Triggering"
else:
func = compute_func
action = "Processing"
msg = f'{action} {friendly_type} "{model}" ({i+1}/{count})'
click.secho(msg, fg="green")
func(model.id, force=force)

if not charts_only:
compute_generic_thumbnail(
"dashboard", Dashboard, model_id, cache_dashboard_thumbnail
)
if not dashboards_only:
compute_generic_thumbnail("chart", Slice, model_id, cache_chart_thumbnail)


@superset.command()
@with_appcontext
def load_test_users():
Expand Down
7 changes: 7 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument
"ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False,
"KV_STORE": False,
"PRESTO_EXPAND_DATA": False,
# Exposes API endpoint to compute thumbnails
"THUMBNAILS": False,
"REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD": False,
"SHARE_QUERIES_VIA_KV_STORE": False,
"SIP_38_VIZ_REARCHITECTURE": False,
Expand Down Expand Up @@ -312,6 +314,11 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument
# return feature_flags_dict
GET_FEATURE_FLAGS_FUNC: Optional[Callable[[Dict[str, bool]], Dict[str, bool]]] = None

# ---------------------------------------------------
# Thumbnail config (behind feature flag)
# ---------------------------------------------------
THUMBNAIL_SELENIUM_USER = "Admin"
THUMBNAIL_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"}

# ---------------------------------------------------
# Image and file configuration
Expand Down
Loading

0 comments on commit d81f720

Please sign in to comment.