diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 0278427ed..fdde965e2 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -148,9 +148,14 @@ def init_clickhouse() -> list[tuple[str, str]]: # Ex: "tutor local do performance-metrics " @click.command(context_settings={"ignore_unknown_options": True}) @click.option( - "--course_key", + "--org", default="", - help="A course_key to apply as a filter, you must include the 'course-v1:'.", + help="An organization to apply as a filter.", +) +@click.option( + "--course_name", + default="", + help="A course_name to apply as a filter.", ) @click.option( "--dashboard_slug", default="", help="Only run charts for the given dashboard." @@ -167,13 +172,15 @@ def init_clickhouse() -> list[tuple[str, str]]: @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics( - course_key, dashboard_slug, slice_name, print_sql, fail_on_error +def performance_metrics( # pylint: disable=too-many-arguments,too-many-positional-arguments + org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error ) -> (list)[tuple[str, str]]: """ Job to measure performance metrics of charts and its queries in Superset and ClickHouse. """ - options = f"--course_key {course_key}" if course_key else "" + options = "" + options += f"--org '{org}' " if org else "" + options += f"--course_name '{course_name}' " if course_name else "" options += f" --dashboard_slug {dashboard_slug}" if dashboard_slug else "" options += f' --slice_name "{slice_name}"' if slice_name else "" options += " --print_sql" if print_sql else "" diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py index f82a57c8c..f5a334355 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py @@ -10,6 +10,8 @@ import yaml from copy import deepcopy from pathlib import Path +from sqlfmt.api import format_string +from sqlfmt.mode import Mode from collections import defaultdict from superset import security_manager @@ -128,6 +130,9 @@ def write_asset_to_file(asset, asset_name, folder, file_name, roles, translated_ asset["sqlalchemy_uri"] = DATABASES.get(asset["database_name"]) if folder in ["charts", "dashboards", "datasets"]: for locale in DASHBOARD_LOCALES: + if folder == "datasets": + asset["sql"] = format_string(asset["sql"], mode=Mode(dialect_name="clickhouse")) + updated_asset = generate_translated_asset( asset, asset_name, folder, locale, roles, translated_asset_uuids ) @@ -171,7 +176,6 @@ def generate_translated_asset(asset, asset_name, folder, language, roles, transl # Save parent & translated uuids in yaml file translated_asset_uuids[parent_uuid].add(copy['uuid']) - if folder == "dashboards": copy["slug"] = f"{copy['slug']}-{language}" copy["description"] = get_translation(copy["description"], language) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index c361b9e85..e21577d85 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -7,9 +7,6 @@ across Superset installations. """ -from create_assets import BASE_DIR, ASSET_FOLDER_MAPPING, app - -import json import logging import os import time @@ -20,10 +17,12 @@ import click import sqlparse import yaml +from create_assets import app + from flask import g from superset import security_manager -from superset.commands.chart.data.get_data_command import ChartDataCommand from superset.charts.schemas import ChartDataQueryContextSchema +from superset.commands.chart.data.get_data_command import ChartDataCommand from superset.extensions import db from superset.models.dashboard import Dashboard from superset.models.slice import Slice @@ -42,46 +41,53 @@ "Result rows: {result_rows}\n" "Memory Usage (MB): {memory_usage_mb}\n" "Row count (superset) {rowcount:}\n" - "Filters: {filters}\n\n" + "Filters: {filters}\n" + "SQL:\n" + "{sql}\n\n\n" ) + @click.command() +@click.option("--org", default="", help="An organization to apply as a filter.") @click.option( - "--course_key", + "--course_name", default="", - help="A course_key to apply as a filter, you must include the 'course-v1:'.") + help="A course_name to apply as a filter, you must include the 'course-v1:'.", +) @click.option( - "--dashboard_slug", - default="", - help="Only run charts for the given dashboard.") + "--dashboard_slug", default="", help="Only run charts for the given dashboard." +) @click.option( "--slice_name", default="", help="Only run charts for the given slice name, if the name appears in more than " - "one dashboard it will be run for each.") + "one dashboard it will be run for each.", +) @click.option( - "--print_sql", - is_flag=True, - default=False, - help="Whether to print the SQL run." + "--print_sql", is_flag=True, default=False, help="Whether to print the SQL run." ) @click.option( "--fail_on_error", is_flag=True, default=False, help="Allow errors to fail the run." ) -def performance_metrics(course_key, dashboard_slug, slice_name, print_sql, - fail_on_error): +def performance_metrics( + org, course_name, dashboard_slug, slice_name, print_sql, fail_on_error +): """ Measure the performance of the dashboard. """ # Mock the client name to identify the queries in the clickhouse system.query_log # table by by the http_user_agent field. extra_filters = [] - if course_key: - extra_filters += [{"col": "course_key", "op": "==", "val": course_key}] + if course_name: + extra_filters += [{"col": "course_name", "op": "IN", "val": course_name}] + if org: + extra_filters += [{"col": "org", "op": "IN", "val": org}] with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name: mock_build_client_name.return_value = RUN_ID - target_dashboards = [dashboard_slug] if dashboard_slug else {{SUPERSET_EMBEDDABLE_DASHBOARDS}} + target_dashboards = ( + [dashboard_slug] if dashboard_slug else {{SUPERSET_EMBEDDABLE_DASHBOARDS}} + ) dashboards = ( db.session.query(Dashboard) @@ -98,14 +104,13 @@ def performance_metrics(course_key, dashboard_slug, slice_name, print_sql, logger.info(f"Dashboard: {dashboard.slug}") for slice in dashboard.slices: if slice_name and not slice_name == slice.slice_name: - logger.info(f"{slice.slice_name} doesn't match {slice_name}, " - f"skipping.") + logger.info( + f"{slice.slice_name} doesn't match {slice_name}, " f"skipping." + ) continue query_context = get_slice_query_context( - slice, - query_contexts, - extra_filters + slice, query_contexts, extra_filters ) result = measure_chart(slice, query_context, fail_on_error) if not result: @@ -167,6 +172,8 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): } ) + query_context["form_data"]["extra_form_data"] = {"filters": extra_filters} + if extra_filters: for query in query_context["queries"]: query["filters"] += extra_filters @@ -174,20 +181,23 @@ def get_slice_query_context(slice, query_contexts, extra_filters=None): return query_context -def measure_chart(slice, query_context, fail_on_error): +def measure_chart(slice, query_context_dict, fail_on_error): """ Measure the performance of a chart and return the results. """ logger.info(f"Fetching slice data: {slice}") g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}") - query_context = ChartDataQueryContextSchema().load(query_context) + query_context = ChartDataQueryContextSchema().load(query_context_dict) command = ChartDataCommand(query_context) - - start_time = datetime.now() + command.validate() + g.form_data = query_context.form_data try: + start_time = datetime.now() result = command.run() - + end_time = datetime.now() + result["time_elapsed"] = (end_time - start_time).total_seconds() + result["slice"] = slice for query in result["queries"]: if "error" in query and query["error"]: raise query["error"] @@ -197,11 +207,6 @@ def measure_chart(slice, query_context, fail_on_error): raise e return - end_time = datetime.now() - - result["time_elapsed"] = (end_time - start_time).total_seconds() - result["slice"] = slice - return result @@ -227,44 +232,38 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err parsed_sql = str(sqlparse.parse(row.pop("query"))[0]) clickhouse_queries[parsed_sql] = row - if print_sql: - logger.info("ClickHouse SQL: ") - logger.info(parsed_sql) - - # Sort report by slowest queries - report = sorted(report, key=lambda x: x["time_elapsed"], reverse=True) - - report_str = f"\nSuperset Reports: {RUN_ID}\n\n" - for i, chart_result in enumerate(report): - report_str += ( - report_format.format( - i=(i + 1), - dashboard=chart_result["dashboard"], - slice=chart_result["slice"], - superset_time=chart_result["time_elapsed"] - ) - ) - for i, query in enumerate(chart_result["queries"]): + for k, chart_result in enumerate(report): + for query in chart_result["queries"]: parsed_sql = ( str(sqlparse.parse(query["query"])[0]).replace(";", "") + "\n FORMAT Native" ) + chart_result["sql"] = parsed_sql + clickhouse_report = clickhouse_queries.get(parsed_sql, {}) + chart_result.update(clickhouse_report) + chart_result.update( + {"query_duration_ms": chart_result.get("query_duration_ms", 0)} + ) - if print_sql: - logger.info("Superset SQL: ") - logger.info(parsed_sql) + # Sort report by slowest queries + report = sorted(report, key=lambda x: x["query_duration_ms"], reverse=True) - clickhouse_report = clickhouse_queries.get(parsed_sql, {}) - report_str += ( - query_format.format( - query_duration_ms=clickhouse_report.get( - "query_duration_ms", 0 - ) / 1000, - memory_usage_mb=clickhouse_report.get("memory_usage_mb"), - result_rows=clickhouse_report.get("result_rows"), - rowcount=query["rowcount"], - filters=query["applied_filters"], - ) + report_str = f"\nSuperset Reports: {RUN_ID}\n\n" + for k, chart_result in enumerate(report): + report_str += report_format.format( + i=(k + 1), + dashboard=chart_result["dashboard"], + slice=chart_result["slice"], + superset_time=chart_result["time_elapsed"], + ) + for query in chart_result["queries"]: + report_str += query_format.format( + query_duration_ms=chart_result.get("query_duration_ms") / 1000, + memory_usage_mb=chart_result.get("memory_usage_mb"), + result_rows=chart_result.get("result_rows"), + rowcount=query["rowcount"], + filters=query["applied_filters"], + sql=chart_result["sql"] if print_sql else "", ) logger.info(report_str) diff --git a/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt b/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt index 3c2b63a0e..c3f55c10d 100644 --- a/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt +++ b/tutoraspects/templates/aspects/build/aspects-superset/requirements.txt @@ -4,3 +4,4 @@ openedx-atlas ruamel-yaml==0.18.6 sentry-sdk[flask] urllib3>=1.26.15,<2 +shandy-sqlfmt[jinjafmt]==0.21.2 diff --git a/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql b/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql index 5b6860455..294836a1a 100644 --- a/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql +++ b/tutoraspects/templates/openedx-assets/queries/active_last_7_days.sql @@ -2,10 +2,13 @@ with recent_activity as ( select course_key, COUNT(DISTINCT actor_id) as active_last_7_days from {{ ASPECTS_XAPI_DATABASE }}.navigation_events - where emission_time >= NOW() - INTERVAL 7 DAY + where + emission_time >= NOW() - INTERVAL 7 DAY + {% include 'openedx-assets/queries/common_filters.sql' %} group by course_key ) select fss.*, COALESCE(ra.active_last_7_days, 0) as active_within_last_7_days from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_student_status fss left join recent_activity ra on fss.course_key = ra.course_key +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql b/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql index 09db324af..c051cbc5b 100644 --- a/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql +++ b/tutoraspects/templates/openedx-assets/queries/at_risk_learner_filter.sql @@ -12,4 +12,6 @@ with select org, course_key, learners.actor_id as actor_id from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_student_status learners join page_visits using (org, course_key, actor_id) -where approving_state = 'failed' and enrollment_status = 'registered' +where + approving_state = 'failed' and enrollment_status = 'registered' + {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql index a8bd54b33..254df792f 100644 --- a/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/at_risk_problem_results.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql b/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql index 94a43a71f..db4c2e23a 100644 --- a/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql +++ b/tutoraspects/templates/openedx-assets/queries/dim_at_risk_learners.sql @@ -29,3 +29,4 @@ where approving_state = 'failed' and enrollment_status = 'registered' and page_visits.last_visited < subtractDays(now(), 7) + {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql b/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql deleted file mode 100644 index 1d014dba5..000000000 --- a/tutoraspects/templates/openedx-assets/queries/dim_course_problems.sql +++ /dev/null @@ -1,20 +0,0 @@ -select - org, - course_name, - course_key, - course_run, - block_id as problem_id, - block_name as problem_name, - display_name_with_location as problem_name_with_location -from {{ DBT_PROFILE_TARGET_DATABASE }}.dim_course_blocks -where - problem_id like '%problem+block%' - {% raw -%} - {% if filter_values("org") != [] %} - and org in {{ filter_values("org") | where_in }} - {% endif %} - {% if filter_values("problem_name_with_location") != [] %} - and problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% endif %} - {%- endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql b/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql deleted file mode 100644 index 16a12a10a..000000000 --- a/tutoraspects/templates/openedx-assets/queries/dim_course_videos.sql +++ /dev/null @@ -1,20 +0,0 @@ -select - org, - course_name, - course_key, - course_run, - block_id as video_id, - block_name as video_name, - display_name_with_location as video_name_with_location -from {{ DBT_PROFILE_TARGET_DATABASE }}.dim_course_blocks -where - video_id like '%video+block%' - {% raw -%} - {% if filter_values("org") != [] %} - and org in {{ filter_values("org") | where_in }} - {% endif %} - {% if filter_values("video_name_with_location") != [] %} - and video_name_with_location - in {{ filter_values("video_name_with_location") | where_in }} - {% endif %} - {%- endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql b/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql index 6db19984b..2b79f7bcc 100644 --- a/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql +++ b/tutoraspects/templates/openedx-assets/queries/enrollment_status.sql @@ -12,3 +12,4 @@ left join {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names cn on fes.org = cn.org and fes.course_key = cn.course_key +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql index ea2894572..f7544ff45 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_navigation_completion.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql index c65b6828b..3c37ec1d7 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_pageview_engagement.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql index bc1630cda..fea9d4161 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_problem_engagement.sql @@ -1,4 +1,9 @@ -{% include 'openedx-assets/queries/fact_problem_engagement.sql' %} +with + fact_problem_engagement as ( + {% include 'openedx-assets/queries/fact_problem_engagement.sql' %} + ) +select fact_problem_engagement.* +from fact_problem_engagement pe join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql index b5a9b793a..2160ab141 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_engagement.sql @@ -4,3 +4,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql index b22e43c4b..1cd8491fd 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_plays.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql index 86efff57d..477d232fe 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_video_watches.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql index df054f3a9..b74dfb745 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_at_risk_watched_video_segments.sql @@ -6,3 +6,4 @@ join ( {% include 'openedx-assets/queries/at_risk_learner_filter.sql' %} ) as at_risk_learners using (org, course_key, actor_id) +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql b/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql deleted file mode 100644 index 08c489444..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_course_grades.sql +++ /dev/null @@ -1,35 +0,0 @@ -with - grades as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_grades - where - grade_type = 'course' - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} - {% elif filter_values("course_name") != [] %} - and entity_name - in {{ filter_values("course_name", remove_filter=True) | where_in }} - {% else %} and 1 = 0 - {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ), - most_recent_grades as ( - select org, course_key, entity_id, actor_id, max(emission_time) as emission_time - from grades - group by org, course_key, entity_id, actor_id - ) - -select - grades.emission_time as emission_time, - grades.org as org, - grades.course_key as course_key, - grades.course_name as course_name, - grades.course_run as course_run, - grades.entity_name as entity_name, - grades.actor_id as actor_id, - grades.grade_type as grade_type, - grades.scaled_score as scaled_score, - grades.grade_bucket as grade_bucket -from grades -join most_recent_grades using (org, course_key, entity_id, actor_id, emission_time) diff --git a/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql b/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql deleted file mode 100644 index c41f9e9d2..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_enrollments_by_day.sql +++ /dev/null @@ -1,68 +0,0 @@ -with - enrollments as ({% include 'openedx-assets/queries/fact_enrollments.sql' %}), - enrollments_ranked as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_mode, - enrollment_status, - rank() over ( - partition by date(emission_time), org, course_name, course_run, actor_id - order by emission_time desc - ) as event_rank - from enrollments - ), - enrollment_windows as ( - select - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode, - emission_time as window_start_at, - lagInFrame(emission_time, 1, now() + interval '1' day) over ( - partition by org, course_name, course_run, actor_id - order by emission_time desc - ) as window_end_at - from enrollments_ranked - where event_rank = 1 - ), - enrollment_window_dates as ( - select - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode, - date_trunc('day', window_start_at) as window_start_date, - date_trunc('day', window_end_at) as window_end_date - from enrollment_windows - ) -select - date( - fromUnixTimestamp( - arrayJoin( - range( - toUnixTimestamp(window_start_date), - toUnixTimestamp(window_end_date), - 86400 - ) - ) - ) - ) as enrollment_status_date, - org, - course_key, - course_name, - course_run, - actor_id, - enrollment_status, - enrollment_mode -from enrollment_window_dates diff --git a/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql b/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql deleted file mode 100644 index 4570c0aa0..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_forum_interactions.sql +++ /dev/null @@ -1,3 +0,0 @@ -select * -from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_forum_interactions -where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql deleted file mode 100644 index 29ff1a79c..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_course_summary.sql +++ /dev/null @@ -1,136 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ), - outcomes as ( - select - emission_time, - org, - course_key, - problem_id, - actor_id, - success, - first_value(success) over ( - partition by course_key, problem_id, actor_id order by success DESC - ) as was_successful - from problem_responses - ), - successful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - min(emission_time) as first_success_at - from outcomes - where was_successful = true and success = true - group by org, course_key, problem_id, actor_id - ), - unsuccessful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - max(emission_time) as last_response_at - from outcomes - where was_successful = false - group by org, course_key, problem_id, actor_id - ), - final_responses as ( - select org, course_key, problem_id, actor_id, first_success_at as emission_time - from successful_responses - union all - select org, course_key, problem_id, actor_id, last_response_at as emission_time - from unsuccessful_responses - ), - int_problem_results as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - responses, - success, - attempts - from problem_responses - inner join - final_responses using (org, course_key, problem_id, actor_id, emission_time) - ), - summary as ( - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - success, - attempts, - 0 as num_hints_displayed, - 0 as num_answers_displayed - from int_problem_results - where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} - union all - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - NULL as success, - NULL as attempts, - caseWithExpression(help_type, 'hint', 1, 0) as num_hints_displayed, - caseWithExpression(help_type, 'answer', 1, 0) as num_answers_displayed - from {{ DBT_PROFILE_TARGET_DATABASE }}.int_problem_hints - where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - coalesce(any(success), false) as success, - coalesce(any(attempts), 0) as attempts, - sum(num_hints_displayed) as num_hints_displayed, - sum(num_answers_displayed) as num_answers_displayed -from summary -where - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("course_name") != [] %} - course_name in {{ filter_values("course_name") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql deleted file mode 100644 index e08113378..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_problem_summary.sql +++ /dev/null @@ -1,125 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ), - outcomes as ( - select - emission_time, - org, - course_key, - problem_id, - actor_id, - success, - first_value(success) over ( - partition by course_key, problem_id, actor_id order by success ASC - ) as was_successful - from problem_responses - ), - successful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - min(emission_time) as first_success_at - from outcomes - where was_successful = true and success = true - group by org, course_key, problem_id, actor_id - ), - unsuccessful_responses as ( - select - org, - course_key, - problem_id, - actor_id, - max(emission_time) as last_response_at - from outcomes - where was_successful = false - group by org, course_key, problem_id, actor_id - ), - final_responses as ( - select org, course_key, problem_id, actor_id, first_success_at as emission_time - from successful_responses - union all - select org, course_key, problem_id, actor_id, last_response_at as emission_time - from unsuccessful_responses - ), - int_problem_results as ( - select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - responses, - success, - attempts - from problem_responses - inner join - final_responses using (org, course_key, problem_id, actor_id, emission_time) - ), - summary_base as ( - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - success, - attempts, - 0 as num_hints_displayed, - 0 as num_answers_displayed - from int_problem_results - union all - select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - NULL as success, - NULL as attempts, - caseWithExpression(help_type, 'hint', 1, 0) as num_hints_displayed, - caseWithExpression(help_type, 'answer', 1, 0) as num_answers_displayed - from {{ DBT_PROFILE_TARGET_DATABASE }}.int_problem_hints - where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - coalesce(any(success), false) as success, - coalesce(any(attempts), 0) as attempts, - sum(num_hints_displayed) as num_hints_displayed, - sum(num_answers_displayed) as num_answers_displayed -from summary_base -where - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("problem_name_with_location") != [] %} - problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql b/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql index 398b2e3b8..ead770402 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_learner_summary.sql @@ -66,3 +66,4 @@ left join on fss.org = let.org and fss.course_key = let.course_key and fss.actor_id = let.actor_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql index ae3a4b63d..ba8980542 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_page_engagement.sql @@ -50,3 +50,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(pv.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql index ebe8d97f0..8c21465b0 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_problem_engagement.sql @@ -51,3 +51,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(pe.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql deleted file mode 100644 index 64dae4984..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_grades.sql +++ /dev/null @@ -1,42 +0,0 @@ -with - grades as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_grades - where - grade_type = 'problem' - - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} - {% elif filter_values("problem_name_with_location") != [] %} - and entity_name_with_location - in {{ - filter_values( - "problem_name_with_location", remove_filter=True - ) | where_in - }} - {% else %} and 1 = 0 - {% endif %} - {% endraw %} - - {% include 'openedx-assets/queries/common_filters.sql' %} - ), - most_recent_grades as ( - select org, course_key, entity_id, actor_id, max(emission_time) as emission_time - from grades - group by org, course_key, entity_id, actor_id - ) - -select - grades.emission_time as emission_time, - grades.org as org, - grades.course_key as course_key, - grades.course_name as course_name, - grades.course_run as course_run, - grades.entity_name as entity_name, - grades.entity_name_with_location as entity_name_with_location, - grades.actor_id as actor_id, - grades.grade_type as grade_type, - grades.scaled_score as scaled_score, - grades.grade_bucket as grade_bucket -from grades -join most_recent_grades using (org, course_key, entity_id, actor_id, emission_time) diff --git a/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql b/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql deleted file mode 100644 index fe273177b..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_problem_responses.sql +++ /dev/null @@ -1,30 +0,0 @@ -with - problem_responses as ( - {% include 'openedx-assets/queries/int_problem_responses.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - attempts, - success, - arrayJoin( - if(JSONArrayLength(responses) > 0, JSONExtractArrayRaw(responses), [responses]) - ) as responses -from problem_responses -where - {% raw %} - {% if get_filters("problem_name_with_location", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("problem_name_with_location") != [] %} - problem_name_with_location - in {{ filter_values("problem_name_with_location") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql b/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql deleted file mode 100644 index 57a2ccbea..000000000 --- a/tutoraspects/templates/openedx-assets/queries/fact_transcript_usage.sql +++ /dev/null @@ -1,25 +0,0 @@ -with - transcripts as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_transcript_usage - where - {% raw %} - {% if get_filters("course_name", remove_filter=True) == [] %} 1 = 1 - {% elif filter_values("course_name") != [] %} - course_name in {{ filter_values("course_name") | where_in }} - {% else %} 1 = 0 - {% endif %} - {% endraw %} - {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - video_name, - video_name_with_location, - actor_id -from transcripts diff --git a/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql b/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql index 300d30e55..19a48674e 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_video_engagement.sql @@ -50,3 +50,4 @@ join left outer join {{ DBT_PROFILE_TARGET_DATABASE }}.dim_user_pii users on toUUID(ve.actor_id) = users.external_user_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql b/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql index ccb109a9c..328ab6da0 100644 --- a/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql +++ b/tutoraspects/templates/openedx-assets/queries/fact_watched_video_segments.sql @@ -77,6 +77,7 @@ with segments.course_key = blocks.course_key and segments.video_id = blocks.block_id ) + where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} ) select diff --git a/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql b/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql deleted file mode 100644 index cf797bbfd..000000000 --- a/tutoraspects/templates/openedx-assets/queries/hints_per_success.sql +++ /dev/null @@ -1,22 +0,0 @@ -with - summary as ({% include 'openedx-assets/queries/fact_learner_problem_summary.sql' %}) - -select - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id, - sum(num_hints_displayed) + sum(num_answers_displayed) as total_hints -from summary -where success = 1 -group by - org, - course_key, - course_name, - course_run, - problem_name, - problem_name_with_location, - actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql deleted file mode 100644 index f0e3c9e48..000000000 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_responses.sql +++ /dev/null @@ -1,21 +0,0 @@ -with - problem_responses_base as ( - select * - from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_problem_responses - where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} - ) - -select - emission_time, - org, - course_key, - course_name, - course_run, - problem_id, - problem_name, - problem_name_with_location, - actor_id, - attempts, - success, - responses -from problem_responses_base diff --git a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql index d71e21cf8..a2f0a39b8 100644 --- a/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql +++ b/tutoraspects/templates/openedx-assets/queries/int_problem_results.sql @@ -49,6 +49,7 @@ with events.interaction_type as interaction_type from {{ ASPECTS_XAPI_DATABASE }}.problem_events events join responses using (org, course_key, problem_id, actor_id, emission_time) + where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} ) select @@ -87,3 +88,4 @@ join left outer join {{ ASPECTS_EVENT_SINK_DATABASE }}.user_pii users on full_responses.actor_id = users.external_user_id::String +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %} diff --git a/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql b/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql deleted file mode 100644 index 548b724b0..000000000 --- a/tutoraspects/templates/openedx-assets/queries/posts_per_user.sql +++ /dev/null @@ -1,9 +0,0 @@ -select org, course_key, course_name, course_run, actor_id, count(*) as num_posts -from {{ DBT_PROFILE_TARGET_DATABASE }}.fact_forum_interactions -where - 1 = 1 - {% raw %} - {% if from_dttm %} and emission_time > '{{ from_dttm }}' {% endif %} - {% if to_dttm %} and emission_time < '{{ to_dttm }}' {% endif %} - {% endraw %} -group by org, course_key, course_name, course_run, actor_id diff --git a/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql b/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql index be9fe83a3..1c61e4ccd 100644 --- a/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql +++ b/tutoraspects/templates/openedx-assets/queries/problem_coursewide_avg.sql @@ -127,3 +127,4 @@ join on full_responses.org = coursewide_attempts.org and full_responses.course_key = coursewide_attempts.course_key and full_responses.problem_id = coursewide_attempts.problem_id +where 1 = 1 {% include 'openedx-assets/queries/common_filters.sql' %}