From 98f75440d41aa0c7d5abfc826a2a3a4136a57849 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 12:23:49 -0500 Subject: [PATCH 01/15] feat(assets): format sql templates fore import --- .../aspects/apps/superset/pythonpath/create_assets.py | 6 +++++- .../aspects/build/aspects-superset/requirements.txt | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py index f72727249..2ae24294f 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py @@ -10,6 +10,8 @@ import yaml from copy import deepcopy from pathlib import Path +from sqlfmt.api import format_string +from sqlfmt.mode import Mode from superset import security_manager from superset.extensions import db @@ -108,6 +110,9 @@ def write_asset_to_file(asset, asset_name, folder, file_name, roles): asset["sqlalchemy_uri"] = DATABASES.get(asset["database_name"]) if folder in ["charts", "dashboards", "datasets"]: for locale in DASHBOARD_LOCALES: + if folder == "datasets": + asset["sql"] = format_string(asset["sql"], mode=Mode(dialect_name="clickhouse")) + updated_asset = generate_translated_asset( asset, asset_name, folder, locale, roles ) @@ -147,7 +152,6 @@ def generate_translated_asset(asset, asset_name, folder, language, roles): copy = deepcopy(asset) copy["uuid"] = str(get_localized_uuid(copy["uuid"], language)) copy[asset_name] = get_translation(copy[asset_name], language) - if folder == "dashboards": copy["slug"] = f"{copy['slug']}-{language}" copy["description"] = get_translation(copy["description"], language) diff --git a/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt b/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt index 3c2b63a0e..c3f55c10d 100644 --- a/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt +++ b/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt @@ -4,3 +4,4 @@ openedx-atlas ruamel-yaml==0.18.6 sentry-sdk[flask] urllib3>=1.26.15,<2 +shandy-sqlfmt[jinjafmt]==0.21.2 From 2777db22fb1cba9a0bdd9f4fba82c62f74c65016 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 09:45:21 -0500 Subject: [PATCH 02/15] feat(metrics): sort metrics by clickhouse query time --- .../pythonpath/performance_metrics.py | 61 ++++++++++--------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index c361b9e85..a6116f928 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -42,7 +42,10 @@ "Result rows: {result_rows}\n" "Memory Usage (MB): {memory_usage_mb}\n" "Row count (superset) {rowcount:}\n" - "Filters: {filters}\n\n" + "Filters: {filters}\n" + "SQL:\n" + "{sql}\n\n\n" + ) @click.command() @@ -123,7 +126,7 @@ def performance_metrics(course_key, dashboard_slug, slice_name, print_sql, return report logger.info("Waiting for clickhouse log...") - time.sleep(20) + time.sleep(30) get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error) return report @@ -184,10 +187,12 @@ def measure_chart(slice, query_context, fail_on_error): query_context = ChartDataQueryContextSchema().load(query_context) command = ChartDataCommand(query_context) - start_time = datetime.now() try: + start_time = datetime.now() result = command.run() - + end_time = datetime.now() + result["time_elapsed"] = (end_time - start_time).total_seconds() + result["slice"] = slice for query in result["queries"]: if "error" in query and query["error"]: raise query["error"] @@ -197,11 +202,6 @@ def measure_chart(slice, query_context, fail_on_error): raise e return - end_time = datetime.now() - - result["time_elapsed"] = (end_time - start_time).total_seconds() - result["slice"] = slice - return result @@ -231,39 +231,44 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err logger.info("ClickHouse SQL: ") logger.info(parsed_sql) + + for k, chart_result in enumerate(report): + for query in chart_result["queries"]: + parsed_sql = ( + str(sqlparse.parse(query["query"])[0]).replace(";", "") + + "\n FORMAT Native" + ) + chart_result['sql'] = parsed_sql + clickhouse_report = clickhouse_queries.get(parsed_sql, {}) + chart_result.update( + clickhouse_report + ) + chart_result.update({ + "query_duration_ms": chart_result.get("query_duration_ms", 0) + }) + # Sort report by slowest queries - report = sorted(report, key=lambda x: x["time_elapsed"], reverse=True) + report = sorted(report, key=lambda x: x["query_duration_ms"], reverse=True) report_str = f"\nSuperset Reports: {RUN_ID}\n\n" - for i, chart_result in enumerate(report): + for k, chart_result in enumerate(report): report_str += ( report_format.format( - i=(i + 1), + i=(k + 1), dashboard=chart_result["dashboard"], slice=chart_result["slice"], superset_time=chart_result["time_elapsed"] ) ) - for i, query in enumerate(chart_result["queries"]): - parsed_sql = ( - str(sqlparse.parse(query["query"])[0]).replace(";", "") - + "\n FORMAT Native" - ) - - if print_sql: - logger.info("Superset SQL: ") - logger.info(parsed_sql) - - clickhouse_report = clickhouse_queries.get(parsed_sql, {}) + for query in chart_result["queries"]: report_str += ( query_format.format( - query_duration_ms=clickhouse_report.get( - "query_duration_ms", 0 - ) / 1000, - memory_usage_mb=clickhouse_report.get("memory_usage_mb"), - result_rows=clickhouse_report.get("result_rows"), + query_duration_ms=chart_result.get('query_duration_ms') / 1000, + memory_usage_mb=chart_result.get("memory_usage_mb"), + result_rows=chart_result.get("result_rows"), rowcount=query["rowcount"], filters=query["applied_filters"], + sql=chart_result['sql'] if print_sql else '', ) ) logger.info(report_str) From c71a90e390b92c6c57ce63f0f59c2fdf53ff09ec Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 12:41:28 -0500 Subject: [PATCH 03/15] fix(metrics): use course name filter --- tutoraspects/commands_v1.py | 8 ++++---- .../apps/superset/pythonpath/performance_metrics.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index dffdbb23e..2723b6f58 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -149,9 +149,9 @@ def init_clickhouse() -> list[tuple[str, str]]: # Ex: "tutor local do performance-metrics " @click.command(context_settings={"ignore_unknown_options": True}) @click.option( - "--course_key", + "--course_name", default="", - help="A course_key to apply as a filter, you must include the 'course-v1:'.", + help="A course_name to apply as a filter.", ) @click.option( "--dashboard_slug", default="", help="Only run charts for the given dashboard." @@ -169,12 +169,12 @@ def init_clickhouse() -> list[tuple[str, str]]: "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) def performance_metrics( - course_key, dashboard_slug, slice_name, print_sql, fail_on_error + course_name, dashboard_slug, slice_name, print_sql, fail_on_error ) -> (list)[tuple[str, str]]: """ Job to measure performance metrics of charts and its queries in Superset and ClickHouse. """ - options = f"--course_key {course_key}" if course_key else "" + options = f"--course_name '{course_name}'" if course_name else "" options += f" --dashboard_slug {dashboard_slug}" if dashboard_slug else "" options += f' --slice_name "{slice_name}"' if slice_name else "" options += " --print_sql" if print_sql else "" diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index a6116f928..a1a88733a 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -50,9 +50,9 @@ @click.command() @click.option( - "--course_key", + "--course_name", default="", - help="A course_key to apply as a filter, you must include the 'course-v1:'.") + help="A course_name to apply as a filter, you must include the 'course-v1:'.") @click.option( "--dashboard_slug", default="", @@ -71,7 +71,7 @@ @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics(course_key, dashboard_slug, slice_name, print_sql, +def performance_metrics(course_name, dashboard_slug, slice_name, print_sql, fail_on_error): """ Measure the performance of the dashboard. @@ -79,8 +79,8 @@ def performance_metrics(course_key, dashboard_slug, slice_name, print_sql, # Mock the client name to identify the queries in the clickhouse system.query_log # table by by the http_user_agent field. extra_filters = [] - if course_key: - extra_filters += [{"col": "course_key", "op": "==", "val": course_key}] + if course_name: + extra_filters += [{"col": "course_name", "op": "IN", "val": course_name}] with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name: mock_build_client_name.return_value = RUN_ID From a42888262cc121e9fb55eab3d147163a15d042d6 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 12:41:53 -0500 Subject: [PATCH 04/15] fix(metrics): set global form data to allow prewhere filters --- .../apps/superset/pythonpath/performance_metrics.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index a1a88733a..bd983f784 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -126,7 +126,7 @@ def performance_metrics(course_name, dashboard_slug, slice_name, print_sql, return report logger.info("Waiting for clickhouse log...") - time.sleep(30) + time.sleep(20) get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error) return report @@ -170,6 +170,10 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): } ) + query_context["form_data"]["extra_form_data"] = { + "filters": extra_filters + } + if extra_filters: for query in query_context["queries"]: query["filters"] += extra_filters @@ -177,16 +181,17 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): return query_context -def measure_chart(slice, query_context, fail_on_error): +def measure_chart(slice, query_context_dict, fail_on_error): """ Measure the performance of a chart and return the results. """ logger.info(f"Fetching slice data: {slice}") g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}") - query_context = ChartDataQueryContextSchema().load(query_context) + query_context = ChartDataQueryContextSchema().load(query_context_dict) command = ChartDataCommand(query_context) - + command.validate() + g.form_data = query_context.form_data try: start_time = datetime.now() result = command.run() From 6753a448d55d63cc00bd400fe40f5c0ddcb38117 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 12:48:29 -0500 Subject: [PATCH 05/15] chore(metrics): remove unnecessary code --- .../aspects/apps/superset/pythonpath/performance_metrics.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index bd983f784..7ac99b070 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -174,10 +174,6 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): "filters": extra_filters } - if extra_filters: - for query in query_context["queries"]: - query["filters"] += extra_filters - return query_context @@ -273,7 +269,7 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err result_rows=chart_result.get("result_rows"), rowcount=query["rowcount"], filters=query["applied_filters"], - sql=chart_result['sql'] if print_sql else '', + sql=chart_result['sql'] if print_sql else '', ) ) logger.info(report_str) From d26555bc6b3d4a32a2e51d402f9e93484c621ec8 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 13:05:00 -0500 Subject: [PATCH 06/15] chore(metrics): remove unnecesary logging --- .../aspects/apps/superset/pythonpath/performance_metrics.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index 7ac99b070..c190dc831 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -228,10 +228,6 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err parsed_sql = str(sqlparse.parse(row.pop("query"))[0]) clickhouse_queries[parsed_sql] = row - if print_sql: - logger.info("ClickHouse SQL: ") - logger.info(parsed_sql) - for k, chart_result in enumerate(report): for query in chart_result["queries"]: From 4cb45adf3f3a623c6e6ff20d3f2f3f800ff6dccd Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 27 Sep 2024 13:12:48 -0500 Subject: [PATCH 07/15] fix(metrics): restore extra filter per query --- .../aspects/apps/superset/pythonpath/performance_metrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index c190dc831..ba681a3a7 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -174,6 +174,10 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): "filters": extra_filters } + if extra_filters: + for query in query_context["queries"]: + query["filters"] += extra_filters + return query_context From f5a7533e7fdaefa7ee5ddad51205a55a0a27320b Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 10:06:22 -0500 Subject: [PATCH 08/15] feat(metrics): add organization filter option --- tutoraspects/commands_v1.py | 11 +++++++++-- .../apps/superset/pythonpath/performance_metrics.py | 8 +++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 2723b6f58..9dbf55c6d 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -148,6 +148,11 @@ def init_clickhouse() -> list[tuple[str, str]]: # Ex: "tutor local do performance-metrics " @click.command(context_settings={"ignore_unknown_options": True}) +@click.option( + "--org", + default="", + help="An organization to apply as a filter.", +) @click.option( "--course_name", default="", @@ -169,12 +174,14 @@ def init_clickhouse() -> list[tuple[str, str]]: "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) def performance_metrics( - course_name, dashboard_slug, slice_name, print_sql, fail_on_error + org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error ) -> (list)[tuple[str, str]]: """ Job to measure performance metrics of charts and its queries in Superset and ClickHouse. """ - options = f"--course_name '{course_name}'" if course_name else "" + options = "" + options += f"--org '{org}' " if org else "" + options += f"--course_name '{course_name}' " if course_name else "" options += f" --dashboard_slug {dashboard_slug}" if dashboard_slug else "" options += f' --slice_name "{slice_name}"' if slice_name else "" options += " --print_sql" if print_sql else "" diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index ba681a3a7..3d709fce7 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -49,6 +49,10 @@ ) @click.command() +@click.option( + "--org", + default="", + help="An organization to apply as a filter.") @click.option( "--course_name", default="", @@ -71,7 +75,7 @@ @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics(course_name, dashboard_slug, slice_name, print_sql, +def performance_metrics(org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error): """ Measure the performance of the dashboard. @@ -81,6 +85,8 @@ def performance_metrics(course_name, dashboard_slug, slice_name, print_sql, extra_filters = [] if course_name: extra_filters += [{"col": "course_name", "op": "IN", "val": course_name}] + if org: + extra_filters += [{"col": "org", "op": "IN", "val": org}] with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name: mock_build_client_name.return_value = RUN_ID From 85f881f37cc61862f6d90b516c17fbc3fe008bca Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 10:11:08 -0500 Subject: [PATCH 09/15] chore: run black and isort on performance metrics --- .../pythonpath/performance_metrics.py | 89 ++++++++----------- 1 file changed, 38 insertions(+), 51 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index 3d709fce7..e21577d85 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -7,9 +7,6 @@ across Superset installations. """ -from create_assets import BASE_DIR, ASSET_FOLDER_MAPPING, app - -import json import logging import os import time @@ -20,10 +17,12 @@ import click import sqlparse import yaml +from create_assets import app + from flask import g from superset import security_manager -from superset.commands.chart.data.get_data_command import ChartDataCommand from superset.charts.schemas import ChartDataQueryContextSchema +from superset.commands.chart.data.get_data_command import ChartDataCommand from superset.extensions import db from superset.models.dashboard import Dashboard from superset.models.slice import Slice @@ -45,38 +44,34 @@ "Filters: {filters}\n" "SQL:\n" "{sql}\n\n\n" - ) + @click.command() -@click.option( - "--org", - default="", - help="An organization to apply as a filter.") +@click.option("--org", default="", help="An organization to apply as a filter.") @click.option( "--course_name", default="", - help="A course_name to apply as a filter, you must include the 'course-v1:'.") + help="A course_name to apply as a filter, you must include the 'course-v1:'.", +) @click.option( - "--dashboard_slug", - default="", - help="Only run charts for the given dashboard.") + "--dashboard_slug", default="", help="Only run charts for the given dashboard." +) @click.option( "--slice_name", default="", help="Only run charts for the given slice name, if the name appears in more than " - "one dashboard it will be run for each.") + "one dashboard it will be run for each.", +) @click.option( - "--print_sql", - is_flag=True, - default=False, - help="Whether to print the SQL run." + "--print_sql", is_flag=True, default=False, help="Whether to print the SQL run." ) @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics(org, course_name, dashboard_slug, slice_name, print_sql, - fail_on_error): +def performance_metrics( + org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error +): """ Measure the performance of the dashboard. """ @@ -90,7 +85,9 @@ def performance_metrics(org, course_name, dashboard_slug, slice_name, print_sql, with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name: mock_build_client_name.return_value = RUN_ID - target_dashboards = [dashboard_slug] if dashboard_slug else {{SUPERSET_EMBEDDABLE_DASHBOARDS}} + target_dashboards = ( + [dashboard_slug] if dashboard_slug else {{SUPERSET_EMBEDDABLE_DASHBOARDS}} + ) dashboards = ( db.session.query(Dashboard) @@ -107,14 +104,13 @@ def performance_metrics(org, course_name, dashboard_slug, slice_name, print_sql, logger.info(f"Dashboard: {dashboard.slug}") for slice in dashboard.slices: if slice_name and not slice_name == slice.slice_name: - logger.info(f"{slice.slice_name} doesn't match {slice_name}, " - f"skipping.") + logger.info( + f"{slice.slice_name} doesn't match {slice_name}, " f"skipping." + ) continue query_context = get_slice_query_context( - slice, - query_contexts, - extra_filters + slice, query_contexts, extra_filters ) result = measure_chart(slice, query_context, fail_on_error) if not result: @@ -176,9 +172,7 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): } ) - query_context["form_data"]["extra_form_data"] = { - "filters": extra_filters - } + query_context["form_data"]["extra_form_data"] = {"filters": extra_filters} if extra_filters: for query in query_context["queries"]: @@ -238,45 +232,38 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err parsed_sql = str(sqlparse.parse(row.pop("query"))[0]) clickhouse_queries[parsed_sql] = row - for k, chart_result in enumerate(report): for query in chart_result["queries"]: parsed_sql = ( str(sqlparse.parse(query["query"])[0]).replace(";", "") + "\n FORMAT Native" ) - chart_result['sql'] = parsed_sql + chart_result["sql"] = parsed_sql clickhouse_report = clickhouse_queries.get(parsed_sql, {}) + chart_result.update(clickhouse_report) chart_result.update( - clickhouse_report + {"query_duration_ms": chart_result.get("query_duration_ms", 0)} ) - chart_result.update({ - "query_duration_ms": chart_result.get("query_duration_ms", 0) - }) # Sort report by slowest queries report = sorted(report, key=lambda x: x["query_duration_ms"], reverse=True) report_str = f"\nSuperset Reports: {RUN_ID}\n\n" for k, chart_result in enumerate(report): - report_str += ( - report_format.format( - i=(k + 1), - dashboard=chart_result["dashboard"], - slice=chart_result["slice"], - superset_time=chart_result["time_elapsed"] - ) + report_str += report_format.format( + i=(k + 1), + dashboard=chart_result["dashboard"], + slice=chart_result["slice"], + superset_time=chart_result["time_elapsed"], ) for query in chart_result["queries"]: - report_str += ( - query_format.format( - query_duration_ms=chart_result.get('query_duration_ms') / 1000, - memory_usage_mb=chart_result.get("memory_usage_mb"), - result_rows=chart_result.get("result_rows"), - rowcount=query["rowcount"], - filters=query["applied_filters"], - sql=chart_result['sql'] if print_sql else '', - ) + report_str += query_format.format( + query_duration_ms=chart_result.get("query_duration_ms") / 1000, + memory_usage_mb=chart_result.get("memory_usage_mb"), + result_rows=chart_result.get("result_rows"), + rowcount=query["rowcount"], + filters=query["applied_filters"], + sql=chart_result["sql"] if print_sql else "", ) logger.info(report_str) From 858f3c269f1783b0691635b3cfdfbfba80e1348c Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 10:26:58 -0500 Subject: [PATCH 10/15] chore: quality fixes --- tutoraspects/commands_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 9dbf55c6d..9d5e32cfc 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -173,7 +173,7 @@ def init_clickhouse() -> list[tuple[str, str]]: @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics( +def performance_metrics( # pylint: disable=too-many-arguments org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error ) -> (list)[tuple[str, str]]: """ From 20b4f531e21d1cddd9517ecaabee1ed6aa282866 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 11:32:20 -0500 Subject: [PATCH 11/15] chore: quality fixes --- tutoraspects/commands_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 9d5e32cfc..03623b9ca 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -173,7 +173,7 @@ def init_clickhouse() -> list[tuple[str, str]]: @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics( # pylint: disable=too-many-arguments +def performance_metrics( # pylint: disable=too-many-arguments,too-many-positional-arguments org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error ) -> (list)[tuple[str, str]]: """ From 4e531d482df45a440dfc51a5ef2b8456649e0b86 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 11:17:41 -0500 Subject: [PATCH 12/15] perf(sql): use org and course_key filters to use primary key indexes --- .../queries/active_last_7_days.sql | 5 +- .../queries/at_risk_learner_filter.sql | 4 +- .../queries/at_risk_problem_results.sql | 1 + .../queries/dim_at_risk_learners.sql | 1 + .../queries/dim_course_problems.sql | 20 --- .../queries/dim_course_videos.sql | 20 --- .../queries/enrollment_status.sql | 1 + .../fact_at_risk_navigation_completion.sql | 1 + .../fact_at_risk_pageview_engagement.sql | 1 + .../fact_at_risk_problem_engagement.sql | 4 +- .../queries/fact_at_risk_video_engagement.sql | 1 + .../queries/fact_at_risk_video_plays.sql | 1 + .../queries/fact_at_risk_video_watches.sql | 1 + .../fact_at_risk_watched_video_segments.sql | 1 + .../queries/fact_forum_interactions.sql | 3 - .../fact_learner_problem_course_summary.sql | 136 ------------------ .../queries/fact_learner_problem_summary.sql | 125 ---------------- .../queries/fact_learner_summary.sql | 1 + .../queries/fact_page_engagement.sql | 1 + .../queries/fact_problem_engagement.sql | 1 + .../queries/fact_problem_grades.sql | 42 ------ .../queries/fact_problem_responses.sql | 30 ---- .../queries/fact_transcript_usage.sql | 25 ---- .../queries/fact_video_engagement.sql | 1 + .../queries/fact_watched_video_segments.sql | 1 + .../queries/hints_per_success.sql | 22 --- .../queries/int_problem_responses.sql | 21 --- .../queries/int_problem_results.sql | 2 + .../openedx-assets/queries/posts_per_user.sql | 9 -- .../queries/problem_coursewide_avg.sql | 1 + 30 files changed, 27 insertions(+), 456 deletions(-) delete mode 100644 tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/hints_per_success.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/posts_per_user.sql diff --git a/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql b/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql index 5b6860455..294836a1a 100644 --- a/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql +++ b/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql @@ -2,10 +2,13 @@ with recent_activity as ( select course_key, COUNT(DISTINCT actor_id) as active_last_7_days from {{ ASPECTS_XAPI_DATABASE }}.navigation_events - where emission_time >= NOW() - INTERVAL 7 DAY + where + emission_time >= NOW() - INTERVAL 7 DAY + {% include 'openedx-assets/queries/common_filters.sql' %} group by course_key ) select fss.*, COALESCE(ra.active_last_7_days, 0) as active_within_last_7_days from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_student_status fss left join recent_activity ra on fss.course_key = ra.course_key +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql b/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql index 09db324af..c051cbc5b 100644 --- a/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql +++ b/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql @@ -12,4 +12,6 @@ with select org, course_key, learners.actor_id as actor_id from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_student_status learners join page_visits using (org, course_key, actor_id) -where approving_state = 'failed' and enrollment_status = 'registered' +where + approving_state = 'failed' and enrollment_status = 'registered' + {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql index a8bd54b33..254df792f 100644 --- a/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql b/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql index 94a43a71f..db4c2e23a 100644 --- a/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql +++ b/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql @@ -29,3 +29,4 @@ where approving_state = 'failed' and enrollment_status = 'registered' and page_visits.last_visited < subtractDays(now(), 7) + {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql b/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql deleted file mode 100644 index 1d014dba5..000000000 --- a/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql +++ /dev/null @@ -1,20 +0,0 @@ -select - org, - course_name, - course_key, - course_run, - block_id as problem_id, - block_name as problem_name, - display_name_with_location as problem_name_with_location -from {{ DBT_PROFILE_TARGET_DATABASE }}.dim_course_blocks -where - problem_id like '%problem+block%' - {% raw -%} - {% if filter_values("org") != [] %} - and org in {{ filter_values("org") | where_in }} - {% endif %} - {% if filter_values("problem_name_with_location") != [] %} - and problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% endif %} - {%- endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql b/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql deleted file mode 100644 index 16a12a10a..000000000 --- a/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql +++ /dev/null @@ -1,20 +0,0 @@ -select - org, - course_name, - course_key, - course_run, - block_id as video_id, - block_name as video_name, - display_name_with_location as video_name_with_location -from {{ DBT_PROFILE_TARGET_DATABASE }}.dim_course_blocks -where - video_id like '%video+block%' - {% raw -%} - {% if filter_values("org") != [] %} - and org in {{ filter_values("org") | where_in }} - {% endif %} - {% if filter_values("video_name_with_location") != [] %} - and video_name_with_location - in {{ filter_values("video_name_with_location") | where_in }} - {% endif %} - {%- endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql b/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql index 6db19984b..2b79f7bcc 100644 --- a/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql +++ b/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql @@ -12,3 +12,4 @@ left join {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names cn on fes.org = cn.org and fes.course_key = cn.course_key +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql index ea2894572..f7544ff45 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql index c65b6828b..3c37ec1d7 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql index bc1630cda..d53cd62f6 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql @@ -1,4 +1,6 @@ -{% include 'openedx-assets/queries/fact_problem_engagement.sql' %} +with fact_problem_engagement as + ({% include 'openedx-assets/queries/fact_problem_engagement.sql' %}) +select fact_problem_engagement.* from fact_problem_engagement pe join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql index b5a9b793a..2160ab141 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql index b22e43c4b..1cd8491fd 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql index 86efff57d..477d232fe 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql index df054f3a9..b74dfb745 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql b/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql deleted file mode 100644 index 4570c0aa0..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql +++ /dev/null @@ -1,3 +0,0 @@ -select * -from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_forum_interactions -where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql deleted file mode 100644 index 29ff1a79c..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql +++ /dev/null @@ -1,136 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ), - outcomes as ( - select - emission_time, - org, - course_key, - problem_id, - actor_id, - success, - first_value(success) over ( - partition by course_key, problem_id, actor_id order by success DESC - ) as was_successful - from problem_responses - ), - successful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - min(emission_time) as first_success_at - from outcomes - where was_successful = true and success = true - group by org, course_key, problem_id, actor_id - ), - unsuccessful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - max(emission_time) as last_response_at - from outcomes - where was_successful = false - group by org, course_key, problem_id, actor_id - ), - final_responses as ( - select org, course_key, problem_id, actor_id, first_success_at as emission_time - from successful_responses - union all - select org, course_key, problem_id, actor_id, last_response_at as emission_time - from unsuccessful_responses - ), - int_problem_results as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - responses, - success, - attempts - from problem_responses - inner join - final_responses using (org, course_key, problem_id, actor_id, emission_time) - ), - summary as ( - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - success, - attempts, - 0 as num_hints_displayed, - 0 as num_answers_displayed - from int_problem_results - where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} - union all - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - NULL as success, - NULL as attempts, - caseWithExpression(help_type, 'hint', 1, 0) as num_hints_displayed, - caseWithExpression(help_type, 'answer', 1, 0) as num_answers_displayed - from {{ DBT_PROFILE_TARGET_DATABASE }}.int_problem_hints - where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - coalesce(any(success), false) as success, - coalesce(any(attempts), 0) as attempts, - sum(num_hints_displayed) as num_hints_displayed, - sum(num_answers_displayed) as num_answers_displayed -from summary -where - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("course_name") != [] %} - course_name in {{ filter_values("course_name") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql deleted file mode 100644 index e08113378..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql +++ /dev/null @@ -1,125 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ), - outcomes as ( - select - emission_time, - org, - course_key, - problem_id, - actor_id, - success, - first_value(success) over ( - partition by course_key, problem_id, actor_id order by success ASC - ) as was_successful - from problem_responses - ), - successful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - min(emission_time) as first_success_at - from outcomes - where was_successful = true and success = true - group by org, course_key, problem_id, actor_id - ), - unsuccessful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - max(emission_time) as last_response_at - from outcomes - where was_successful = false - group by org, course_key, problem_id, actor_id - ), - final_responses as ( - select org, course_key, problem_id, actor_id, first_success_at as emission_time - from successful_responses - union all - select org, course_key, problem_id, actor_id, last_response_at as emission_time - from unsuccessful_responses - ), - int_problem_results as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - responses, - success, - attempts - from problem_responses - inner join - final_responses using (org, course_key, problem_id, actor_id, emission_time) - ), - summary_base as ( - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - success, - attempts, - 0 as num_hints_displayed, - 0 as num_answers_displayed - from int_problem_results - union all - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - NULL as success, - NULL as attempts, - caseWithExpression(help_type, 'hint', 1, 0) as num_hints_displayed, - caseWithExpression(help_type, 'answer', 1, 0) as num_answers_displayed - from {{ DBT_PROFILE_TARGET_DATABASE }}.int_problem_hints - where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - coalesce(any(success), false) as success, - coalesce(any(attempts), 0) as attempts, - sum(num_hints_displayed) as num_hints_displayed, - sum(num_answers_displayed) as num_answers_displayed -from summary_base -where - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("problem_name_with_location") != [] %} - problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql index 398b2e3b8..ead770402 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql @@ -66,3 +66,4 @@ left join on fss.org = let.org and fss.course_key = let.course_key and fss.actor_id = let.actor_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql index ae3a4b63d..ba8980542 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql @@ -50,3 +50,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(pv.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql index ebe8d97f0..8c21465b0 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql @@ -51,3 +51,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(pe.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql deleted file mode 100644 index 64dae4984..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql +++ /dev/null @@ -1,42 +0,0 @@ -with - grades as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_grades - where - grade_type = 'problem' - - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} - {% elif filter_values("problem_name_with_location") != [] %} - and entity_name_with_location - in {{ - filter_values( - "problem_name_with_location", remove_filter=True - ) | where_in - }} - {% else %} and 1 = 0 - {% endif %} - {% endraw %} - - {% include 'openedx-assets/queries/common_filters.sql' %} - ), - most_recent_grades as ( - select org, course_key, entity_id, actor_id, max(emission_time) as emission_time - from grades - group by org, course_key, entity_id, actor_id - ) - -select - grades.emission_time as emission_time, - grades.org as org, - grades.course_key as course_key, - grades.course_name as course_name, - grades.course_run as course_run, - grades.entity_name as entity_name, - grades.entity_name_with_location as entity_name_with_location, - grades.actor_id as actor_id, - grades.grade_type as grade_type, - grades.scaled_score as scaled_score, - grades.grade_bucket as grade_bucket -from grades -join most_recent_grades using (org, course_key, entity_id, actor_id, emission_time) diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql deleted file mode 100644 index fe273177b..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql +++ /dev/null @@ -1,30 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - attempts, - success, - arrayJoin( - if(JSONArrayLength(responses) > 0, JSONExtractArrayRaw(responses), [responses]) - ) as responses -from problem_responses -where - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("problem_name_with_location") != [] %} - problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql b/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql deleted file mode 100644 index 57a2ccbea..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql +++ /dev/null @@ -1,25 +0,0 @@ -with - transcripts as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_transcript_usage - where - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("course_name") != [] %} - course_name in {{ filter_values("course_name") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - video_name, - video_name_with_location, - actor_id -from transcripts diff --git a/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql index 300d30e55..19a48674e 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql @@ -50,3 +50,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(ve.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql b/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql index ccb109a9c..328ab6da0 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql @@ -77,6 +77,7 @@ with segments.course_key = blocks.course_key and segments.video_id = blocks.block_id ) + where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} ) select diff --git a/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql b/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql deleted file mode 100644 index cf797bbfd..000000000 --- a/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql +++ /dev/null @@ -1,22 +0,0 @@ -with - summary as ({% include 'openedx-assets/queries/fact_learner_problem_summary.sql' %}) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - sum(num_hints_displayed) + sum(num_answers_displayed) as total_hints -from summary -where success = 1 -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql deleted file mode 100644 index f0e3c9e48..000000000 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql +++ /dev/null @@ -1,21 +0,0 @@ -with - problem_responses_base as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_problem_responses - where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - attempts, - success, - responses -from problem_responses_base diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql index d71e21cf8..ca4ffc839 100644 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql @@ -49,6 +49,7 @@ with events.interaction_type as interaction_type from {{ ASPECTS_XAPI_DATABASE }}.problem_events events join responses using (org, course_key, problem_id, actor_id, emission_time) + where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} ) select @@ -87,3 +88,4 @@ join left outer join {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii users on full_responses.actor_id = users.external_user_id::String + {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql b/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql deleted file mode 100644 index 548b724b0..000000000 --- a/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql +++ /dev/null @@ -1,9 +0,0 @@ -select org, course_key, course_name, course_run, actor_id, count(*) as num_posts -from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_forum_interactions -where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} -group by org, course_key, course_name, course_run, actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql b/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql index be9fe83a3..1c61e4ccd 100644 --- a/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql +++ b/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql @@ -127,3 +127,4 @@ join on full_responses.org = coursewide_attempts.org and full_responses.course_key = coursewide_attempts.course_key and full_responses.problem_id = coursewide_attempts.problem_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} From 715c55ff3cfa5e4f021e35da15009302364e4689 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 12:53:18 -0500 Subject: [PATCH 13/15] fix(sql): add where 1=1 for int_problem_results --- .../templates/openedx-assets/queries/int_problem_results.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql index ca4ffc839..2df242a30 100644 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql @@ -88,4 +88,5 @@ join left outer join {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii users on full_responses.actor_id = users.external_user_id::String +where 1=1 {% include 'openedx-assets/queries/common_filters.sql' %} From c9467c9ef904480a5f533fb1723b886507a7a4a6 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 13:15:24 -0500 Subject: [PATCH 14/15] chore(sql): format sql files --- .../queries/fact_at_risk_problem_engagement.sql | 9 ++++++--- .../openedx-assets/queries/int_problem_results.sql | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql index d53cd62f6..fea9d4161 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql @@ -1,6 +1,9 @@ -with fact_problem_engagement as - ({% include 'openedx-assets/queries/fact_problem_engagement.sql' %}) -select fact_problem_engagement.* from fact_problem_engagement pe +with + fact_problem_engagement as ( + {% include 'openedx-assets/queries/fact_problem_engagement.sql' %} + ) +select fact_problem_engagement.* +from fact_problem_engagement pe join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql index 2df242a30..a2f0a39b8 100644 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql @@ -88,5 +88,4 @@ join left outer join {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii users on full_responses.actor_id = users.external_user_id::String -where 1=1 - {% include 'openedx-assets/queries/common_filters.sql' %} +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} From 0dc8aa593dd0c0f67913dcd55393a3faeca7454f Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Mon, 30 Sep 2024 14:20:57 -0500 Subject: [PATCH 15/15] chore(sql): remove unused files --- .../queries/fact_course_grades.sql | 35 ---------- .../queries/fact_enrollments_by_day.sql | 68 ------------------- 2 files changed, 103 deletions(-) delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql delete mode 100644 tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql diff --git a/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql b/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql deleted file mode 100644 index 08c489444..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql +++ /dev/null @@ -1,35 +0,0 @@ -with - grades as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_grades - where - grade_type = 'course' - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} - {% elif filter_values("course_name") != [] %} - and entity_name - in {{ filter_values("course_name", remove_filter=True) | where_in }} - {% else %} and 1 = 0 - {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ), - most_recent_grades as ( - select org, course_key, entity_id, actor_id, max(emission_time) as emission_time - from grades - group by org, course_key, entity_id, actor_id - ) - -select - grades.emission_time as emission_time, - grades.org as org, - grades.course_key as course_key, - grades.course_name as course_name, - grades.course_run as course_run, - grades.entity_name as entity_name, - grades.actor_id as actor_id, - grades.grade_type as grade_type, - grades.scaled_score as scaled_score, - grades.grade_bucket as grade_bucket -from grades -join most_recent_grades using (org, course_key, entity_id, actor_id, emission_time) diff --git a/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql b/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql deleted file mode 100644 index c41f9e9d2..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql +++ /dev/null @@ -1,68 +0,0 @@ -with - enrollments as ({% include 'openedx-assets/queries/fact_enrollments.sql' %}), - enrollments_ranked as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_mode, - enrollment_status, - rank() over ( - partition by date(emission_time), org, course_name, course_run, actor_id - order by emission_time desc - ) as event_rank - from enrollments - ), - enrollment_windows as ( - select - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode, - emission_time as window_start_at, - lagInFrame(emission_time, 1, now() + interval '1' day) over ( - partition by org, course_name, course_run, actor_id - order by emission_time desc - ) as window_end_at - from enrollments_ranked - where event_rank = 1 - ), - enrollment_window_dates as ( - select - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode, - date_trunc('day', window_start_at) as window_start_date, - date_trunc('day', window_end_at) as window_end_date - from enrollment_windows - ) -select - date( - fromUnixTimestamp( - arrayJoin( - range( - toUnixTimestamp(window_start_date), - toUnixTimestamp(window_end_date), - 86400 - ) - ) - ) - ) as enrollment_status_date, - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode -from enrollment_window_dates