Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add performance metrics commands #657

Merged
merged 11 commits into from
Mar 18, 2024
6 changes: 6 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ jobs:
tutor local do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor local do import-assets
- name: Performance metrics
run: tutor local do performance-metrics
- name: Tutor stop
run: tutor local stop

Expand Down Expand Up @@ -136,6 +138,8 @@ jobs:
tutor dev do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor dev do import-assets
- name: Performance metrics
run: tutor dev do performance-metrics
- name: Tutor stop
run: tutor dev stop

Expand Down Expand Up @@ -218,6 +222,8 @@ jobs:
tutor k8s do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor k8s do import-assets
- name: Performance metrics
run: tutor k8s do performance-metrics
- name: Check failure logs
if: failure()
run: |
Expand Down
3 changes: 1 addition & 2 deletions tutoraspects/asset_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,11 @@ class ChartAsset(Asset):

path = "charts"
omitted_vars = [
"query_context",
"params.dashboards",
"params.datasource",
"params.slice_id",
]
raw_vars = ["sqlExpression"]
raw_vars = ["sqlExpression", "query_context"]


class DashboardAsset(Asset):
Expand Down
17 changes: 17 additions & 0 deletions tutoraspects/commands_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ def alembic(context, command) -> None:
runner.run_job("aspects", command)


# Ex: "tutor local do performance-metrics "
@click.command(context_settings={"ignore_unknown_options": True})
@click.pass_obj
def performance_metrics(context) -> None:
"""
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
"""
config = tutor_config.load(context.root)
runner = context.job_runner(config)

command = """echo 'Performance...' &&
python /app/pythonpath/performance_metrics.py &&
echo 'Done!';
"""
runner.run_job("superset", command)


# Ex: "tutor local do import_assets "
@click.command(context_settings={"ignore_unknown_options": True})
@click.pass_obj
Expand Down
17 changes: 17 additions & 0 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,22 @@ def import_assets() -> list[tuple[str, str]]:
]


# Ex: "tutor local do performance-metrics "
@click.command(context_settings={"ignore_unknown_options": True})
def performance_metrics() -> list[tuple[str, str]]:
"""
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
"""
return [
(
"superset",
"echo 'Performance...' && "
"python /app/pythonpath/performance_metrics.py &&"
"echo 'Done!';",
),
]


# Ex: "tutor local do dump_data_to_clickhouse "
@click.command(context_settings={"ignore_unknown_options": True})
@click.option(
Expand Down Expand Up @@ -311,6 +327,7 @@ def check_superset_assets():
dump_data_to_clickhouse,
transform_tracking_logs,
import_assets,
performance_metrics,
)

COMMANDS = (aspects,)
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,18 @@ def get_preferences(self, username):
openedx_apis = current_app.config["OPENEDX_API_URLS"]
url = openedx_apis["get_preference"].format(username=username)
oauth_remote = self.oauth_remotes.get("openedxsso")
response = oauth_remote.get(url, token=self.get_oauth_token()).json()
locale_preference = response.get("pref-lang", "en").replace("-", "_")
locale_preference = "en"
try:
response = oauth_remote.get(url, token=self.get_oauth_token()).json()
locale_preference = response.get("pref-lang", "en").replace("-", "_")
except Exception as e:
return locale_preference

if locale_preference not in current_app.config["DASHBOARD_LOCALES"]:
log.warning(
f"Language {locale_preference} is not supported by Superset"
)
locale_preference = "en"
log.warning(
f"Language {locale_preference} is not supported by Superset"
)
return locale_preference

return locale_preference

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from superset.app import create_app

app = create_app()
app.app_context().push()


import json
import logging
import time
import uuid
from datetime import datetime
from unittest.mock import patch

import sqlparse
from flask import g
from superset import security_manager
from superset.charts.data.commands.get_data_command import ChartDataCommand
from superset.charts.schemas import ChartDataQueryContextSchema
from superset.extensions import db
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice

logger = logging.getLogger("performance_metrics")

ASPECTS_VERSION = "{{ASPECTS_VERSION}}"
UUID = str(uuid.uuid4())[0:6]
RUN_ID = f"aspects-{ASPECTS_VERSION}-{UUID}"

report_format = "{i}. {slice}\n" "Superset time: {superset_time} (s).\n"

query_format = (
"Query duration: {query_duration_ms} (s).\n"
"Result rows: {result_rows}\n"
"Memory Usage (MB): {memory_usage_mb}\n"
"Row count (superset) {rowcount:}\n"
"Filters: {filters}\n\n"
)


def performance_metrics():
"""Measure the performance of the dashboard."""
# Mock the client name to identify the queries in the clickhouse system.query_log table by
# by the http_user_agent field.
with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name:
Ian2012 marked this conversation as resolved.
Show resolved Hide resolved
mock_build_client_name.return_value = RUN_ID
embedable_dashboards = {{SUPERSET_EMBEDDABLE_DASHBOARDS}}
dashboards = (
db.session.query(Dashboard)
.filter(Dashboard.slug.in_(embedable_dashboards))
.all()
)
report = []
for dashboard in dashboards:
logger.info(f"Dashboard: {dashboard.slug}")
for slice in dashboard.slices:
result = measure_chart(slice)
for query in result["queries"]:
# Remove the data from the query to avoid memory issues on large datasets.
query.pop("data")
report.append(result)
return report


def measure_chart(slice, extra_filters=[]):
"""
Measure the performance of a chart and return the results.
"""
logger.info(f"Fetching slice data: {slice}")
query_context = json.loads(slice.query_context)
query_context.update(
{
"result_format": "json",
"result_type": "full",
"force": True,
"datasource": {
"type": "table",
"id": slice.datasource_id,
},
}
)

if extra_filters:
query_context["filters"].extend(extra_filters)

g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}")
query_context = ChartDataQueryContextSchema().load(query_context)
command = ChartDataCommand(query_context)

start_time = datetime.now()
result = command.run()
end_time = datetime.now()

result["time_elapsed"] = (end_time - start_time).total_seconds()
result["slice"] = slice
return result


def get_query_log_from_clickhouse(report):
"""
Get the query log from clickhouse and print the results.
"""
chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482"

slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one()

query_context = json.loads(slice.query_context)
query_context["queries"][0]["filters"].append(
{"col": "http_user_agent", "op": "==", "val": RUN_ID}
)
slice.query_context = json.dumps(query_context)

result = measure_chart(slice)

clickhouse_queries = {}
for query in result["queries"]:
for row in query["data"]:
parsed_sql = str(sqlparse.parse(row.pop("query"))[0])
clickhouse_queries[parsed_sql] = row

# Sort report by slowest queries
report = sorted(report, key=lambda x: x["time_elapsed"], reverse=True)

report_str = f"\nSuperset Reports: {RUN_ID}\n\n"
for i, result in enumerate(report):
report_str+=(
report_format.format(
i=(i + 1), slice=result["slice"], superset_time=result["time_elapsed"]
)
)
for i, query in enumerate(result["queries"]):
parsed_sql = (
str(sqlparse.parse(query["query"])[0]).replace(";", "")
+ "\n FORMAT Native"
)
clickhouse_report = clickhouse_queries.get(parsed_sql, {})
report_str+=(
query_format.format(
query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000,
memory_usage_mb=clickhouse_report.get("memory_usage_mb"),
result_rows=clickhouse_report.get("result_rows"),
rowcount=query["rowcount"],
filters=query["applied_filters"],
)
)
logger.info(report_str)


if __name__ == "__main__":
logger.info(f"Running performance metrics. RUN ID: {RUN_ID}")
report = performance_metrics()
# Clickhouse query log takes some seconds to log queries.
logger.info("Waiting for clickhouse log...")
time.sleep(10)
get_query_log_from_clickhouse(report)
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@ dataset_uuid: d777bd95-2110-46db-a1c4-8358be81a85a
description: The distribution of grades for a course, out of 100%. Grades are grouped
in ranges of 10%.
params:
adhoc_filters: []
adhoc_filters:
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: emission_time
bottom_margin: auto
color_scheme: supersetColors
columns: []
extra_form_data: {}
granularity_sqla: emission_time
groupby:
- grade_bucket
metrics:
Expand All @@ -29,6 +33,10 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Number Of Students
query_context: '{"datasource":{"id":236,"type":"table"},"force":false,"queries":[{"filters":[{"col":"emission_time","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["grade_bucket"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"236__table","viz_type":"dist_bar","slice_id":298,"metrics":["students"],"adhoc_filters":[{"clause":"WHERE","subject":"emission_time","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"groupby":["grade_bucket"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"order_bars":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Number
Of Students","y_axis_bounds":[null,null],"x_axis_label":"Course Grade (out of 100%)","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Course Grade Distribution
uuid: f9adbc85-1f50-4c04-ace3-31ba7390de5e
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ params:
operatorId: EQUALS
sqlExpression: null
subject: enrollment_status
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: enrollment_status_date
color_picker:
a: 1
b: 135
g: 122
r: 0
extra_form_data: {}
granularity_sqla: enrollment_status_date
header_font_size: 0.4
metric: count
rolling_type: None
Expand All @@ -34,9 +38,12 @@ params:
subheader_font_size: 0.15
time_format: smart_date
time_grain_sqla: P1D
time_range: No filter
viz_type: big_number
x_axis: enrollment_status_date
y_axis_format: SMART_NUMBER
query_context: '{"datasource":{"id":188,"type":"table"},"force":false,"queries":[{"filters":[{"col":"enrollment_status","op":"==","val":"registered"},{"col":"enrollment_status_date","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":[{"timeGrain":"P1D","columnType":"BASE_AXIS","sqlExpression":"enrollment_status_date","label":"enrollment_status_date","expressionType":"SQL"}],"metrics":["count"],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"post_processing":[{"operation":"pivot","options":{"index":["enrollment_status_date"],"columns":[],"aggregates":{"count":{"operator":"mean"}},"drop_missing_columns":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"188__table","viz_type":"big_number","slice_id":344,"x_axis":"enrollment_status_date","time_grain_sqla":"P1D","metric":"count","adhoc_filters":[{"clause":"WHERE","comparator":"registered","expressionType":"SIMPLE","filterOptionName":"filter_hcnm4t7piq6_hfbtt65nqqs","isExtra":false,"isNew":false,"operator":"==","operatorId":"EQUALS","sqlExpression":null,"subject":"enrollment_status"},{"clause":"WHERE","subject":"enrollment_status_date","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"show_trend_line":true,"start_y_axis_at_zero":true,"color_picker":{"a":1,"b":135,"g":122,"r":0},"header_font_size":0.4,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","rolling_type":"None","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Currently Enrolled Learners Per Day
uuid: ed2fe731-6544-422f-bc55-42f399f48b2c
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ certified_by: null
dataset_uuid: 39d1e786-c0c8-4c56-81c8-56fb0df88001
description: null
params:
adhoc_filters: []
adhoc_filters:
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: emission_time
extra_form_data: {}
granularity_sqla: emission_time
header_font_size: 0.4
metric:
aggregate: COUNT_DISTINCT
Expand Down Expand Up @@ -36,9 +40,11 @@ params:
sqlExpression: null
subheader_font_size: 0.15
time_format: smart_date
time_range: No filter
viz_type: big_number_total
y_axis_format: SMART_NUMBER
query_context: '{"datasource":{"id":242,"type":"table"},"force":false,"queries":[{"filters":[{"col":"emission_time","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"aggregate":"COUNT_DISTINCT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"actor_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":445,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"datasourceWarning":false,"expressionType":"SIMPLE","hasCustomLabel":false,"label":"COUNT_DISTINCT(actor_id)","optionName":"metric_5y4uvwa13v4_f12i3twecs6","sqlExpression":null}],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"242__table","viz_type":"big_number_total","slice_id":395,"metric":{"aggregate":"COUNT_DISTINCT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"actor_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":445,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"datasourceWarning":false,"expressionType":"SIMPLE","hasCustomLabel":false,"label":"COUNT_DISTINCT(actor_id)","optionName":"metric_5y4uvwa13v4_f12i3twecs6","sqlExpression":null},"adhoc_filters":[{"clause":"WHERE","subject":"emission_time","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"header_font_size":0.4,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distinct forum users
uuid: feb323ad-c819-49ca-a336-584bd9ff1a2e
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Students
query_context: '{"datasource":{"id":209,"type":"table"},"force":false,"queries":[{"filters":[{"col":"success","op":"==","val":"true"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["attempts"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"209__table","viz_type":"dist_bar","slice_id":331,"metrics":["students"],"adhoc_filters":[{"clause":"WHERE","comparator":"true","expressionType":"SIMPLE","filterOptionName":"filter_0fpmws3t1h6a_md2ud9xse7m","isExtra":false,"isNew":false,"operator":"==","operatorId":"EQUALS","sqlExpression":null,"subject":"success"}],"groupby":["attempts"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"order_bars":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Students","y_axis_bounds":[null,null],"x_axis_label":"Number
Of Attempts To Find Correct Answer","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distribution Of Attempts
uuid: db90930f-f16e-4c32-8050-0e4abae28f4c
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Students
query_context: '{"datasource":{"id":191,"type":"table"},"force":false,"queries":[{"filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["total_hints"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"191__table","viz_type":"dist_bar","slice_id":307,"metrics":["students"],"adhoc_filters":[],"groupby":["total_hints"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Students","y_axis_bounds":[null,null],"x_axis_label":"Hints
/ Answer Displayed Before Correct Answer Chosen","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distribution Of Hints Per Correct Answer
uuid: ee94be4c-6fdd-4295-b43c-40890d6c549d
version: 1.0.0
Expand Down
Loading