Skip to content

Commit

Permalink
Backend for thumnail computation + caching + endpoints
Browse files Browse the repository at this point in the history
This PR is the backend subset of WiP PR #6601. It includes

* refactor of the Selenium abstractions used for email schedules,
  generalized to also work with thumbs
* new dependency on PIL, the common way of doing image processing in
  python, we use it to resize selenium screenshots into thumbs
* CLI utilities to compute-thumbnails
* the addition of an extra caching backend for thumbnail, in most cases
  we assume it should point to the same backend as the one used for
  chart JSON caching
* 2 new endpoints to retrieve dashboard and chart thumbs
  • Loading branch information
mistercrunch committed Aug 9, 2019
1 parent 6df2a71 commit cef4673
Show file tree
Hide file tree
Showing 17 changed files with 542 additions and 33 deletions.
5 changes: 5 additions & 0 deletions superset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def get_manifest():
# Setup the cache prior to registering the blueprints.
cache = setup_cache(app, conf.get("CACHE_CONFIG"))
tables_cache = setup_cache(app, conf.get("TABLE_NAMES_CACHE_CONFIG"))
thumbnail_cache = setup_cache(app, conf.get("THUMBNAIL_CACHE_CONFIG"))

for bp in conf.get("BLUEPRINTS"):
try:
Expand All @@ -120,6 +121,10 @@ def get_manifest():
if conf.get("SILENCE_FAB"):
logging.getLogger("flask_appbuilder").setLevel(logging.ERROR)

logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("selenium").setLevel(logging.ERROR)
logging.getLogger("PIL").setLevel(logging.ERROR)

if app.debug:
app.logger.setLevel(logging.DEBUG) # pylint: disable=no-member
else:
Expand Down
4 changes: 2 additions & 2 deletions superset/assets/stylesheets/less/cosmo/variables.less
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
@gray-darker: lighten(@gray-base, 13.5%);
@gray-dark: lighten(@gray-base, 20%);
@gray: lighten(@gray-base, 33.5%);
@gray-light: lighten(@gray-base, 70%);
@gray-lighter: lighten(@gray-base, 95%);
@gray-light: lighten(@gray-base, 80%);
@gray-lighter: lighten(@gray-base, 90%);

@brand-primary: #00A699;
@brand-success: #4AC15F;
Expand Down
69 changes: 69 additions & 0 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,75 @@ def load_test_users():
load_test_users_run()


@app.cli.command()
@click.option(
"--asynchronous",
"-a",
is_flag=True,
default=False,
help="Trigger commands to run remotely on a worker",
)
@click.option(
"--dashboards_only",
"-d",
is_flag=True,
default=False,
help="Only process dashboards",
)
@click.option(
"--charts_only", "-c", is_flag=True, default=False, help="Only process charts"
)
@click.option(
"--force",
"-f",
is_flag=True,
default=False,
help="Force refresh, even if previously cached",
)
@click.option("--id", "-i", multiple=True)
def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id):
"""Compute thumbnails"""
from superset.models import core as models
from superset.tasks.thumbnails import (
cache_chart_thumbnail,
cache_dashboard_thumbnail,
)

if not charts_only:
query = db.session.query(models.Dashboard)
if id:
query = query.filter(models.Dashboard.id.in_(id))
dashboards = query.all()
count = len(dashboards)
for i, dash in enumerate(dashboards):
if asynchronous:
func = cache_dashboard_thumbnail.delay
action = "Triggering"
else:
func = cache_dashboard_thumbnail
action = "Processing"
msg = f'{action} dashboard "{dash.dashboard_title}" ({i+1}/{count})'
click.secho(msg, fg="green")
func(dash.id, force=force)

if not dashboards_only:
query = db.session.query(models.Slice)
if id:
query = query.filter(models.Slice.id.in_(id))
slices = query.all()
count = len(slices)
for i, slc in enumerate(slices):
if asynchronous:
func = cache_chart_thumbnail.delay
action = "Triggering"
else:
func = cache_chart_thumbnail
action = "Processing"
msg = f'{action} chart "{slc.slice_name}" ({i+1}/{count})'
click.secho(msg, fg="green")
func(slc.id, force=force)


def load_test_users_run():
"""
Loads admin, alpha, and gamma user for testing purposes
Expand Down
2 changes: 2 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@
# you'll want to use a proper broker as specified here:
# http://docs.celeryproject.org/en/latest/getting-started/brokers/index.html

CELERYD_LOG_LEVEL = "DEBUG"


class CeleryConfig(object):
BROKER_URL = "sqla+sqlite:///celerydb.sqlite"
Expand Down
10 changes: 10 additions & 0 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ def short_data(self):
def select_star(self):
pass

@property
def data_summary(self):
return {
"datasource_name": self.datasource_name,
"type": self.type,
"schema": self.schema,
"id": self.id,
"explore_url": self.explore_url,
}

@property
def data(self):
"""Data representation of the datasource sent to the frontend"""
Expand Down
49 changes: 49 additions & 0 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from superset.models.helpers import AuditMixinNullable, ImportMixin
from superset.models.tags import ChartUpdater, DashboardUpdater, FavStarUpdater
from superset.models.user_attributes import UserAttribute
from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.utils import cache as cache_util, core as utils
from superset.viz import viz_types
from urllib import parse # noqa
Expand Down Expand Up @@ -187,6 +188,26 @@ def cls_model(self):
def datasource(self):
return self.get_datasource

@property
def thumbnail_url(self):
# SHA here is to force bypassing the browser cache when chart has changed
sha = utils.md5_hex(self.params, 6)
return f"/thumb/chart/{self.id}/{sha}/"

@property
def thumbnail_img(self):
return Markup(f'<img width="75" src="{self.thumbnail_url}">')

@property
def thumbnail_link(self):
return Markup(
f"""
<a href="{self.thumbnail_url}?force=true">
{self.thumbnail_img}
</a>
"""
)

def clone(self):
return Slice(
slice_name=self.slice_name,
Expand Down Expand Up @@ -711,6 +732,34 @@ def export_dashboards(cls, dashboard_ids):
indent=4,
)

@property
def thumbnail_url(self):
# SHA here is to force bypassing the browser cache when chart has changed
sha = utils.md5_hex(self.position_json, 6)
return f"/thumb/dashboard/{self.id}/{sha}/"

@property
def thumbnail_img(self):
return Markup(f'<img width="150" src="{self.thumbnail_url}">')

@property
def thumbnail_link(self):
return Markup(
f"""
<a href="{self.thumbnail_url}?force=true">
{self.thumbnail_img}
</a>
"""
)


def event_after_dashboard_changed(mapper, connection, target):
cache_dashboard_thumbnail.delay(target.id, force=True)


sqla.event.listen(Dashboard, "before_insert", event_after_dashboard_changed)
sqla.event.listen(Dashboard, "before_update", event_after_dashboard_changed)


class Database(Model, AuditMixinNullable, ImportMixin):

Expand Down
7 changes: 6 additions & 1 deletion superset/tasks/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from sqlalchemy import and_, func

from superset import app, db
from superset.models.core import Dashboard, Log, Slice
from superset.models.tags import Tag, TaggedObject
from superset.tasks.celery_app import app as celery_app
from superset.utils.core import parse_human_datetime
Expand Down Expand Up @@ -132,6 +131,8 @@ class DummyStrategy(Strategy):

def get_urls(self):
session = db.create_scoped_session()
from superset.models.core import Slice

charts = session.query(Slice).all()

return [get_url(chart) for chart in charts]
Expand Down Expand Up @@ -166,6 +167,8 @@ def get_urls(self):
urls = []
session = db.create_scoped_session()

from superset.models.core import Dashboard, Log

records = (
session.query(Log.dashboard_id, func.count(Log.dashboard_id))
.filter(and_(Log.dashboard_id.isnot(None), Log.dttm >= self.since))
Expand Down Expand Up @@ -223,6 +226,8 @@ def get_urls(self):
)
.all()
)
from superset.models.core import Dashboard, Slice

dash_ids = [tagged_object.object_id for tagged_object in tagged_objects]
tagged_dashboards = session.query(Dashboard).filter(Dashboard.id.in_(dash_ids))
for dashboard in tagged_dashboards:
Expand Down
54 changes: 31 additions & 23 deletions superset/tasks/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import simplejson as json
from werkzeug.utils import parse_cookie

# Superset framework imports
from superset import app, db, security_manager
from superset.models.schedules import (
EmailDeliveryType,
Expand All @@ -46,7 +45,7 @@
SliceEmailReportFormat,
)
from superset.tasks.celery_app import app as celery_app
from superset.utils.core import get_email_address_list, send_email_smtp
from superset.utils import core as utils

# Globals
config = app.config
Expand All @@ -66,13 +65,13 @@ def _get_recipients(schedule):
to = schedule.recipients
yield (to, bcc)
else:
for to in get_email_address_list(schedule.recipients):
for to in utils.get_email_address_list(schedule.recipients):
yield (to, bcc)


def _deliver_email(schedule, subject, email):
for (to, bcc) in _get_recipients(schedule):
send_email_smtp(
utils.send_email_smtp(
to,
subject,
email.body,
Expand All @@ -85,16 +84,19 @@ def _deliver_email(schedule, subject, email):
)


def _generate_mail_content(schedule, screenshot, name, url):
if schedule.delivery_type == EmailDeliveryType.attachment:
def _generate_mail_content(delivery_type, screenshot, name, url):
config = app.config
if delivery_type == EmailDeliveryType.attachment:
images = None
data = {"screenshot.png": screenshot}
body = __(
'<b><a href="%(url)s">Explore in Superset</a></b><p></p>',
name=name,
url=url,
)
elif schedule.delivery_type == EmailDeliveryType.inline:
else:
# Implicit: delivery_type == EmailDeliveryType.inline:

# Get the domain from the 'From' address ..
# and make a message id without the < > in the ends
domain = parseaddr(config.get("SMTP_MAIL_FROM"))[1].split("@")[1]
Expand Down Expand Up @@ -239,13 +241,10 @@ def deliver_dashboard(schedule):
prefix=config.get("EMAIL_REPORTS_SUBJECT_PREFIX"),
title=dashboard.dashboard_title,
)
_deliver_email(_get_recipients(schedule), subject, email)

_deliver_email(schedule, subject, email)


def _get_slice_data(schedule):
slc = schedule.slice

def _get_slice_data(slc, delivery_type):
slice_url = _get_url_path(
"Superset.explore_json", csv="true", form_data=json.dumps({"slice_id": slc.id})
)
Expand All @@ -266,7 +265,7 @@ def _get_slice_data(schedule):
# TODO: Move to the csv module
rows = [r.split(b",") for r in response.content.splitlines()]

if schedule.delivery_type == EmailDeliveryType.inline:
if delivery_type == EmailDeliveryType.inline:
data = None

# Parse the csv file and generate HTML
Expand All @@ -280,7 +279,7 @@ def _get_slice_data(schedule):
link=url,
)

elif schedule.delivery_type == EmailDeliveryType.attachment:
elif delivery_type == EmailDeliveryType.attachment:
data = {__("%(name)s.csv", name=slc.slice_name): response.content}
body = __(
'<b><a href="%(url)s">Explore in Superset</a></b><p></p>',
Expand Down Expand Up @@ -326,24 +325,25 @@ def _get_slice_visualization(schedule):
return _generate_mail_content(schedule, screenshot, slc.slice_name, slice_url)


def deliver_slice(schedule):
def deliver_slice(slc, recipients, email_format, delivery_type):
"""
Given a schedule, delivery the slice as an email report
"""
if schedule.email_format == SliceEmailReportFormat.data:
email = _get_slice_data(schedule)
elif schedule.email_format == SliceEmailReportFormat.visualization:
email = _get_slice_visualization(schedule)
config = app.config
if email_format == SliceEmailReportFormat.data:
email = _get_slice_data(slc, delivery_type)
elif email_format == SliceEmailReportFormat.visualization:
email = _get_slice_visualization(slc, delivery_type)
else:
raise RuntimeError("Unknown email report format")

subject = __(
"%(prefix)s %(title)s",
prefix=config.get("EMAIL_REPORTS_SUBJECT_PREFIX"),
title=schedule.slice.slice_name,
title=slc.slice_name,
)

_deliver_email(schedule, subject, email)
_deliver_email(recipients, subject, email)


@celery_app.task(name="email_reports.send", bind=True, soft_time_limit=300)
Expand All @@ -362,9 +362,16 @@ def schedule_email_report(task, report_type, schedule_id, recipients=None):
schedule.recipients = recipients

if report_type == ScheduleType.dashboard.value:
deliver_dashboard(schedule)
deliver_dashboard(
schedule.dashboard, _get_recipients(schedule), schedule.delivery_type
)
elif report_type == ScheduleType.slice.value:
deliver_slice(schedule)
deliver_slice(
schedule.slice,
_get_recipients(schedule),
schedule.email_format,
schedule.delivery_type,
)
else:
raise RuntimeError("Unknown report type")

Expand Down Expand Up @@ -412,6 +419,7 @@ def schedule_window(report_type, start_at, stop_at, resolution):
@celery_app.task(name="email_reports.schedule_hourly")
def schedule_hourly():
""" Celery beat job meant to be invoked hourly """
config = app.config

if not config.get("ENABLE_SCHEDULED_EMAIL_REPORTS"):
logging.info("Scheduled email reports not enabled in config")
Expand Down
Loading

0 comments on commit cef4673

Please sign in to comment.