From 089b2d0433a9b79a3cff3e987774caa62923ed4e Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Mon, 24 Apr 2023 16:22:59 -0400 Subject: [PATCH] feat: Add event listener for course publish Creates the edx-platform plugin plumbing, adds some new requirements, maps the appropriate Django Signal to push course structure to ClickHouse. --- README.rst | 52 +++- event_sink_clickhouse/apps.py | 39 +++ event_sink_clickhouse/settings/__init__.py | 0 event_sink_clickhouse/settings/common.py | 19 ++ event_sink_clickhouse/settings/production.py | 13 + event_sink_clickhouse/signals.py | 13 + event_sink_clickhouse/sinks/__init__.py | 0 event_sink_clickhouse/sinks/base_sink.py | 26 ++ .../sinks/course_published.py | 226 ++++++++++++++++++ event_sink_clickhouse/tasks.py | 30 +++ requirements/base.in | 5 +- requirements/base.txt | 74 +++++- requirements/ci.txt | 4 +- requirements/dev.txt | 125 +++++++++- requirements/doc.txt | 122 +++++++++- requirements/pip.txt | 2 +- requirements/quality.txt | 127 +++++++++- requirements/test.in | 1 + requirements/test.txt | 133 ++++++++++- setup.py | 8 +- test_utils/helpers.py | 210 ++++++++++++++++ tests/test_course_published.py | 117 +++++++++ tests/test_django_settings.py | 55 +++++ 23 files changed, 1363 insertions(+), 38 deletions(-) create mode 100644 event_sink_clickhouse/settings/__init__.py create mode 100644 event_sink_clickhouse/settings/common.py create mode 100644 event_sink_clickhouse/settings/production.py create mode 100644 event_sink_clickhouse/signals.py create mode 100644 event_sink_clickhouse/sinks/__init__.py create mode 100644 event_sink_clickhouse/sinks/base_sink.py create mode 100644 event_sink_clickhouse/sinks/course_published.py create mode 100644 event_sink_clickhouse/tasks.py create mode 100644 test_utils/helpers.py create mode 100644 tests/test_course_published.py create mode 100644 tests/test_django_settings.py diff --git a/README.rst b/README.rst index 3ad6808..9172454 100644 --- a/README.rst +++ b/README.rst @@ -7,17 +7,25 @@ Event Sink ClickHouse Purpose ******* -A listener for `Open edX events`_ to send them to ClickHouse. This project -acts as a plugin to the Edx Platform, listens for configured Open edX events, -and sends them to a ClickHouse database for analytics or other processing. This -is being maintained as part of the Open Analytics Reference System (OARS) -project. +This project acts as a plugin to the `Edx Platform`_, listens for +configured `Open edX events`_, and sends them to a `ClickHouse`_ database for +analytics or other processing. This is being maintained as part of the Open +Analytics Reference System (`OARS`_) project. OARS consumes the data sent to ClickHouse by this plugin as part of data enrichment for reporting, or capturing data that otherwise does not fit in xAPI. +Currently the only sink is in the CMS. It listens for the ``COURSE_PUBLISHED`` +signal and serializes a subset of the published course blocks into one table +and the relationships between blocks into another table. With those we are +able to recreate the "graph" of the course and get relevant data, such as +block names, for reporting. + .. _Open edX events: https://github.com/openedx/openedx-events +.. _Edx Platform: https://github.com/openedx/edx-platform +.. _ClickHouse: https://clickhouse.com +.. _OARS: https://docs.openedx.org/projects/openedx-oars/en/latest/index.html Getting Started *************** @@ -75,12 +83,38 @@ Every time you develop something in this repo Deploying ========= -TODO: How can a new user go about deploying this component? Is it just a few -commands? Is there a larger how-to that should be linked here? +This plugin will be deployed by default in an OARS Tutor environment. For other +deployments install the library or add it to private requirements of your +virtual environment ( ``requirements/private.txt`` ). + +#. Run ``pip install openedx-event-sink-clickhouse``. + +#. Run migrations: + +- ``python manage.py lms migrate`` + +- ``python manage.py cms migrate`` -PLACEHOLDER: For details on how to deploy this component, see the `deployment how-to`_ +#. Restart LMS service and celery workers of edx-platform. + +Configuration +=============== + +Currently all events will be listened to by default (there is only one). So +the only necessary configuration is a ClickHouse connection: + +.. code-block:: -.. _deployment how-to: https://docs.openedx.org/projects/openedx-event-sink-clickhouse/how-tos/how-to-deploy-this-component.html + EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "changeme", + "password": "changeme", + "database": "event_sink", + "timeout_secs": 3, + } Getting Help ************ diff --git a/event_sink_clickhouse/apps.py b/event_sink_clickhouse/apps.py index 4981b8f..2d666d2 100644 --- a/event_sink_clickhouse/apps.py +++ b/event_sink_clickhouse/apps.py @@ -3,6 +3,7 @@ """ from django.apps import AppConfig +from edx_django_utils.plugins import PluginSettings, PluginSignals class EventSinkClickhouseConfig(AppConfig): @@ -11,3 +12,41 @@ class EventSinkClickhouseConfig(AppConfig): """ name = 'event_sink_clickhouse' + verbose_name = "Event Sink ClickHouse" + + plugin_app = { + PluginSettings.CONFIG: { + 'lms.djangoapp': { + 'production': {PluginSettings.RELATIVE_PATH: 'settings.production'}, + 'common': {PluginSettings.RELATIVE_PATH: 'settings.common'}, + 'devstack': {PluginSettings.RELATIVE_PATH: 'settings.devstack'}, + }, + 'cms.djangoapp': { + 'production': {PluginSettings.RELATIVE_PATH: 'settings.production'}, + 'common': {PluginSettings.RELATIVE_PATH: 'settings.common'}, + 'devstack': {PluginSettings.RELATIVE_PATH: 'settings.devstack'}, + } + }, + # Configuration setting for Plugin Signals for this app. + PluginSignals.CONFIG: { + # Configure the Plugin Signals for each Project Type, as needed. + 'cms.djangoapp': { + # List of all plugin Signal receivers for this app and project type. + PluginSignals.RECEIVERS: [{ + # The name of the app's signal receiver function. + PluginSignals.RECEIVER_FUNC_NAME: 'receive_course_publish', + + # The full path to the module where the signal is defined. + PluginSignals.SIGNAL_PATH: 'xmodule.modulestore.django.COURSE_PUBLISHED', + }], + } + }, + } + + def ready(self): + """ + Import our Celery tasks for initialization. + """ + super().ready() + + from . import tasks # pylint: disable=import-outside-toplevel, unused-import diff --git a/event_sink_clickhouse/settings/__init__.py b/event_sink_clickhouse/settings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/event_sink_clickhouse/settings/common.py b/event_sink_clickhouse/settings/common.py new file mode 100644 index 0000000..1b6d117 --- /dev/null +++ b/event_sink_clickhouse/settings/common.py @@ -0,0 +1,19 @@ +""" +Default settings for the openedx_event_sink_clickhouse app. +""" + + +def plugin_settings(settings): + """ + Adds default settings + """ + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "changeme", + "password": "changeme", + "database": "event_sink", + "timeout_secs": 3, + } diff --git a/event_sink_clickhouse/settings/production.py b/event_sink_clickhouse/settings/production.py new file mode 100644 index 0000000..18fee66 --- /dev/null +++ b/event_sink_clickhouse/settings/production.py @@ -0,0 +1,13 @@ +""" +Production settings for the openedx_event_sink_clickhouse app. +""" + + +def plugin_settings(settings): + """ + Override the default app settings with production settings. + """ + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = settings.ENV_TOKENS.get( + 'EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG', + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + ) diff --git a/event_sink_clickhouse/signals.py b/event_sink_clickhouse/signals.py new file mode 100644 index 0000000..f752120 --- /dev/null +++ b/event_sink_clickhouse/signals.py @@ -0,0 +1,13 @@ +""" +Signal handler functions, mapped to specific signals in apps.py. +""" + + +def receive_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument + """ + Receives COURSE_PUBLISHED signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from .tasks import dump_course_to_clickhouse # pylint: disable=import-outside-toplevel + + dump_course_to_clickhouse.delay(str(course_key)) diff --git a/event_sink_clickhouse/sinks/__init__.py b/event_sink_clickhouse/sinks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/event_sink_clickhouse/sinks/base_sink.py b/event_sink_clickhouse/sinks/base_sink.py new file mode 100644 index 0000000..89ca440 --- /dev/null +++ b/event_sink_clickhouse/sinks/base_sink.py @@ -0,0 +1,26 @@ +""" +Base classes for event sinks +""" + +from django.conf import settings + + +class BaseSink: + """ + Base class for ClickHouse event sink, allows overwriting of default settings + """ + def __init__(self, connection_overrides, log): + self.log = log + self.ch_url = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["url"] + self.ch_auth = (settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["username"], + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["password"]) + self.ch_database = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["database"] + self.ch_timeout_secs = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["timeout_secs"] + + # If any overrides to the ClickHouse connection + if connection_overrides: + self.ch_url = connection_overrides.get("url", self.ch_url) + self.ch_auth = (connection_overrides.get("username", self.ch_auth[0]), + connection_overrides.get("password", self.ch_auth[1])) + self.ch_database = connection_overrides.get("database", self.ch_database) + self.ch_timeout_secs = connection_overrides.get("timeout_secs", self.ch_timeout_secs) diff --git a/event_sink_clickhouse/sinks/course_published.py b/event_sink_clickhouse/sinks/course_published.py new file mode 100644 index 0000000..92c7567 --- /dev/null +++ b/event_sink_clickhouse/sinks/course_published.py @@ -0,0 +1,226 @@ +""" +Handler for the CMS COURSE_PUBLISHED event + +Does the following: +- Pulls the course structure from modulestore +- Serialize the xblocks and their parent/child relationships +- Sends them to ClickHouse in CSV format + +Note that the serialization format does not include all fields as there may be things like +LTI passwords and other secrets. We just take the fields necessary for reporting at this time. +""" + +import csv +import io + +import requests +from django.utils import timezone + +from .base_sink import BaseSink + + +class CoursePublishedSink(BaseSink): + """ + Event sink for the COURSE_PUBLISHED signal + """ + @staticmethod + def _get_detached_xblock_types(): + """ + Import and return DETACHED_XBLOCK_TYPES. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.store_utilities import DETACHED_XBLOCK_TYPES + return DETACHED_XBLOCK_TYPES + + @staticmethod + def _get_modulestore(): + """ + Import and return modulestore. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.django import modulestore + return modulestore() + + @staticmethod + def _get_course_overview_model(): + """ + Import and return CourseOverview. + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from openedx.core.djangoapps.content.course_overviews.models import CourseOverview + return CourseOverview + + @staticmethod + def strip_branch_and_version(location): + """ + Removes the branch and version information from a location. + Args: + location: an xblock's location. + Returns: that xblock's location without branch and version information. + """ + return location.for_branch(None) + + @staticmethod + def get_course_last_published(course_key): + """ + Get approximate last publish date for the given course. + + We use the 'modified' column in the CourseOverview table as a quick and easy + (although perhaps inexact) way of determining when a course was last + published. This works because CourseOverview rows are re-written upon + course publish. + + Args: + course_key: a CourseKey + + Returns: The datetime the course was last published at, stringified. + Uses Python's default str(...) implementation for datetimes, which + is sortable and similar to ISO 8601: + https://docs.python.org/3/library/datetime.html#datetime.date.__str__ + """ + CourseOverview = CoursePublishedSink._get_course_overview_model() + approx_last_published = CourseOverview.get_from_id(course_key).modified + return str(approx_last_published) + + @staticmethod + def serialize_item(item, index, detached_xblock_types): + """ + Args: + item: an XBlock + index: a number indicating where the item falls in the course hierarchy + + Returns: + fields: a *limited* dictionary of an XBlock's field names and values + block_type: the name of the XBlock's type (i.e. 'course' + or 'problem') + """ + course_key = item.scope_ids.usage_id.course_key + block_type = item.scope_ids.block_type + + rtn_fields = { + 'org': course_key.org, + 'course_key': str(course_key), + 'course': course_key.course, + 'run': course_key.run, + 'location': str(item.location), + 'display_name': item.display_name_with_default.replace("'", "\'"), + 'block_type': block_type, + 'detached': 1 if block_type in detached_xblock_types else 0, + 'edited_on': str(getattr(item, 'edited_on', '')), + 'time_last_dumped': str(timezone.now()), + 'order': index, + } + + return rtn_fields + + def serialize_course(self, course_id): + """ + Serializes a course into a CSV of nodes and relationships. + + Args: + course_id: CourseKey of the course we want to serialize + + Returns: + nodes: a list of dicts representing xblocks for the course + relationships: a list of dicts representing relationships between nodes + """ + modulestore = CoursePublishedSink._get_modulestore() + detached_xblock_types = CoursePublishedSink._get_detached_xblock_types() + + # create a location to node mapping we'll need later for + # writing relationships + location_to_node = {} + items = modulestore.get_items(course_id) + + # create nodes + i = 0 + for item in items: + i += 1 + fields = self.serialize_item(item, i, detached_xblock_types) + location_to_node[self.strip_branch_and_version(item.location)] = fields + + # create relationships + relationships = [] + for item in items: + for index, child in enumerate(item.get_children()): + parent_node = location_to_node.get(self.strip_branch_and_version(item.location)) + child_node = location_to_node.get(self.strip_branch_and_version(child.location)) + + if parent_node is not None and child_node is not None: + relationship = { + 'course_key': str(course_id), + 'parent_location': str(parent_node["location"]), + 'child_location': str(child_node["location"]), + 'order': index + } + relationships.append(relationship) + + nodes = list(location_to_node.values()) + return nodes, relationships + + def dump(self, course_key): + """ + Do the serialization and send to ClickHouse + """ + nodes, relationships = self.serialize_course(course_key) + + self.log.info( + "Now dumping %s to ClickHouse: %d nodes and %d relationships", + course_key, + len(nodes), + len(relationships), + ) + + course_string = str(course_key) + + try: + # Params that begin with "param_" will be used in the query replacement + # all others are ClickHouse settings. + params = { + # Fail early on bulk inserts + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + } + + # "query" is a special param for the query, it's the best way to get the FORMAT CSV in there. + params["query"] = f"INSERT INTO {self.ch_database}.course_blocks FORMAT CSV" + + output = io.StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC) + + for node in nodes: + writer.writerow(node.values()) + + response = requests.post(self.ch_url, data=output.getvalue(), params=params, auth=self.ch_auth, + timeout=self.ch_timeout_secs) + self.log.info(response.headers) + self.log.info(response) + self.log.info(response.text) + response.raise_for_status() + + # Just overwriting the previous query + params["query"] = f"INSERT INTO {self.ch_database}.course_relationships FORMAT CSV" + output = io.StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC) + + for relationship in relationships: + writer.writerow(relationship.values()) + + response = requests.post(self.ch_url, data=output.getvalue(), params=params, auth=self.ch_auth, + timeout=self.ch_timeout_secs) + self.log.info(response.headers) + self.log.info(response) + self.log.info(response.text) + response.raise_for_status() + + self.log.info("Completed dumping %s to ClickHouse", course_key) + + except Exception: + self.log.exception( + "Error trying to dump course %s to ClickHouse!", + course_string + ) + raise diff --git a/event_sink_clickhouse/tasks.py b/event_sink_clickhouse/tasks.py new file mode 100644 index 0000000..60d54e1 --- /dev/null +++ b/event_sink_clickhouse/tasks.py @@ -0,0 +1,30 @@ +""" +This file contains a management command for exporting course modulestore data to ClickHouse. +""" + +import logging + +from celery import shared_task +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import CourseKey + +from .sinks.course_published import CoursePublishedSink + +log = logging.getLogger(__name__) +celery_log = logging.getLogger('edx.celery.task') + + +@shared_task +@set_code_owner_attribute +def dump_course_to_clickhouse(course_key_string, connection_overrides=None): + """ + Serialize a course and writes it to ClickHouse. + + Arguments: + course_key_string: course key for the course to be exported + connection_overrides (dict): overrides to ClickHouse connection + parameters specified in `settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG`. + """ + course_key = CourseKey.from_string(course_key_string) + sink = CoursePublishedSink(connection_overrides=connection_overrides, log=celery_log) + sink.dump(course_key) diff --git a/requirements/base.in b/requirements/base.in index a954780..c08ac0a 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,5 +1,8 @@ # Core requirements for using this application -c constraints.txt +celery # Asynchronous task execution library Django # Web application framework - +requests # HTTP request library +edx-django-utils # Django utilities, we use caching and monitoring +edx-opaque-keys # Parsing library for course and usage keys diff --git a/requirements/base.txt b/requirements/base.txt index a615544..6dc376e 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,13 +4,85 @@ # # make upgrade # +amqp==5.1.1 + # via kombu asgiref==3.6.0 # via django +billiard==3.6.4.0 + # via celery +celery==5.2.7 + # via -r requirements/base.in +certifi==2022.12.7 + # via requests +cffi==1.15.1 + # via pynacl +charset-normalizer==3.1.0 + # via requests +click==8.1.3 + # via + # celery + # click-didyoumean + # click-plugins + # click-repl + # edx-django-utils +click-didyoumean==0.3.0 + # via celery +click-plugins==1.1.1 + # via celery +click-repl==0.2.0 + # via celery django==3.2.18 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.in + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via edx-django-utils +django-waffle==3.0.0 + # via edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/base.in +edx-opaque-keys==2.3.0 + # via -r requirements/base.in +idna==3.4 + # via requests +kombu==5.2.4 + # via celery +newrelic==8.8.0 + # via edx-django-utils +pbr==5.11.1 + # via stevedore +prompt-toolkit==3.0.38 + # via click-repl +psutil==5.9.5 + # via edx-django-utils +pycparser==2.21 + # via cffi +pymongo==3.13.0 + # via edx-opaque-keys +pynacl==1.5.0 + # via edx-django-utils pytz==2023.3 - # via django + # via + # celery + # django +requests==2.29.0 + # via -r requirements/base.in +six==1.16.0 + # via click-repl sqlparse==0.4.4 # via django +stevedore==5.0.0 + # via + # edx-django-utils + # edx-opaque-keys +urllib3==1.26.15 + # via requests +vine==5.0.0 + # via + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via prompt-toolkit diff --git a/requirements/ci.txt b/requirements/ci.txt index 1347919..bed1d01 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -12,7 +12,7 @@ filelock==3.12.0 # virtualenv packaging==23.1 # via tox -platformdirs==3.2.0 +platformdirs==3.5.0 # via virtualenv pluggy==1.0.0 # via tox @@ -29,5 +29,5 @@ tox==3.28.0 # tox-battery tox-battery==0.6.1 # via -r requirements/ci.in -virtualenv==20.22.0 +virtualenv==20.23.0 # via tox diff --git a/requirements/dev.txt b/requirements/dev.txt index af8de8e..bea09b2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/quality.txt + # kombu asgiref==3.6.0 # via # -r requirements/quality.txt @@ -13,29 +17,64 @@ astroid==2.15.4 # -r requirements/quality.txt # pylint # pylint-celery +billiard==3.6.4.0 + # via + # -r requirements/quality.txt + # celery build==0.10.0 # via # -r requirements/pip-tools.txt # pip-tools +celery==5.2.7 + # via -r requirements/quality.txt +certifi==2022.12.7 + # via + # -r requirements/quality.txt + # requests +cffi==1.15.1 + # via + # -r requirements/quality.txt + # pynacl chardet==5.1.0 # via diff-cover +charset-normalizer==3.1.0 + # via + # -r requirements/quality.txt + # requests click==8.1.3 # via # -r requirements/pip-tools.txt # -r requirements/quality.txt + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint # pip-tools +click-didyoumean==0.3.0 + # via + # -r requirements/quality.txt + # celery click-log==0.4.0 # via # -r requirements/quality.txt # edx-lint +click-plugins==1.1.1 + # via + # -r requirements/quality.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/quality.txt + # celery code-annotations==1.3.0 # via # -r requirements/quality.txt # edx-lint -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/quality.txt # pytest-cov @@ -53,11 +92,25 @@ django==3.2.18 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/quality.txt + # django-crum + # edx-django-utils # edx-i18n-tools +django-crum==0.7.9 + # via + # -r requirements/quality.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/quality.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/quality.txt edx-i18n-tools==0.9.2 # via -r requirements/dev.in edx-lint==5.3.4 # via -r requirements/quality.txt +edx-opaque-keys==2.3.0 + # via -r requirements/quality.txt exceptiongroup==1.1.1 # via # -r requirements/quality.txt @@ -67,6 +120,10 @@ filelock==3.12.0 # -r requirements/ci.txt # tox # virtualenv +idna==3.4 + # via + # -r requirements/quality.txt + # requests iniconfig==2.0.0 # via # -r requirements/quality.txt @@ -80,6 +137,10 @@ jinja2==3.1.2 # -r requirements/quality.txt # code-annotations # diff-cover +kombu==5.2.4 + # via + # -r requirements/quality.txt + # celery lazy-object-proxy==1.9.0 # via # -r requirements/quality.txt @@ -92,6 +153,10 @@ mccabe==0.7.0 # via # -r requirements/quality.txt # pylint +newrelic==8.8.0 + # via + # -r requirements/quality.txt + # edx-django-utils packaging==23.1 # via # -r requirements/ci.txt @@ -108,7 +173,7 @@ pbr==5.11.1 # stevedore pip-tools==6.13.0 # via -r requirements/pip-tools.txt -platformdirs==3.2.0 +platformdirs==3.5.0 # via # -r requirements/ci.txt # -r requirements/quality.txt @@ -123,17 +188,29 @@ pluggy==1.0.0 # tox polib==1.2.0 # via edx-i18n-tools +prompt-toolkit==3.0.38 + # via + # -r requirements/quality.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/quality.txt + # edx-django-utils py==1.11.0 # via # -r requirements/ci.txt # tox pycodestyle==2.10.0 # via -r requirements/quality.txt +pycparser==2.21 + # via + # -r requirements/quality.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.txt pygments==2.15.1 # via diff-cover -pylint==2.17.2 +pylint==2.17.3 # via # -r requirements/quality.txt # edx-lint @@ -153,6 +230,14 @@ pylint-plugin-utils==0.7 # -r requirements/quality.txt # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/quality.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/quality.txt + # edx-django-utils pyproject-hooks==1.0.0 # via # -r requirements/pip-tools.txt @@ -173,16 +258,25 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/quality.txt + # celery # django pyyaml==6.0 # via # -r requirements/quality.txt # code-annotations # edx-i18n-tools + # responses +requests==2.29.0 + # via + # -r requirements/quality.txt + # responses +responses==0.23.1 + # via -r requirements/quality.txt six==1.16.0 # via # -r requirements/ci.txt # -r requirements/quality.txt + # click-repl # edx-lint # tox snowballstemmer==2.2.0 @@ -197,6 +291,8 @@ stevedore==5.0.0 # via # -r requirements/quality.txt # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/quality.txt @@ -212,7 +308,7 @@ tomli==2.0.1 # pyproject-hooks # pytest # tox -tomlkit==0.11.7 +tomlkit==0.11.8 # via # -r requirements/quality.txt # pylint @@ -223,15 +319,34 @@ tox==3.28.0 # tox-battery tox-battery==0.6.1 # via -r requirements/ci.txt +types-pyyaml==6.0.12.9 + # via + # -r requirements/quality.txt + # responses typing-extensions==4.5.0 # via # -r requirements/quality.txt # astroid # pylint -virtualenv==20.22.0 +urllib3==1.26.15 + # via + # -r requirements/quality.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/quality.txt + # amqp + # celery + # kombu +virtualenv==20.23.0 # via # -r requirements/ci.txt # tox +wcwidth==0.2.6 + # via + # -r requirements/quality.txt + # prompt-toolkit wheel==0.40.0 # via # -r requirements/pip-tools.txt diff --git a/requirements/doc.txt b/requirements/doc.txt index 5caa34f..23fc8d3 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -8,6 +8,10 @@ accessible-pygments==0.0.4 # via pydata-sphinx-theme alabaster==0.7.13 # via sphinx +amqp==5.1.1 + # via + # -r requirements/test.txt + # kombu asgiref==3.6.0 # via # -r requirements/test.txt @@ -18,21 +22,52 @@ babel==2.12.1 # sphinx beautifulsoup4==4.12.2 # via pydata-sphinx-theme +billiard==3.6.4.0 + # via + # -r requirements/test.txt + # celery bleach==6.0.0 # via readme-renderer build==0.10.0 # via -r requirements/doc.in +celery==5.2.7 + # via -r requirements/test.txt certifi==2022.12.7 - # via requests + # via + # -r requirements/test.txt + # requests +cffi==1.15.1 + # via + # -r requirements/test.txt + # pynacl charset-normalizer==3.1.0 - # via requests + # via + # -r requirements/test.txt + # requests click==8.1.3 # via # -r requirements/test.txt + # celery + # click-didyoumean + # click-plugins + # click-repl # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.3.0 # via -r requirements/test.txt -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/test.txt # pytest-cov @@ -40,6 +75,16 @@ django==3.2.18 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/test.txt + # edx-django-utils doc8==1.1.1 # via -r requirements/doc.in docutils==0.19 @@ -49,12 +94,18 @@ docutils==0.19 # readme-renderer # restructuredtext-lint # sphinx +edx-django-utils==5.4.0 + # via -r requirements/test.txt +edx-opaque-keys==2.3.0 + # via -r requirements/test.txt exceptiongroup==1.1.1 # via # -r requirements/test.txt # pytest idna==3.4 - # via requests + # via + # -r requirements/test.txt + # requests imagesize==1.4.1 # via sphinx importlib-metadata==6.6.0 @@ -77,6 +128,10 @@ jinja2==3.1.2 # sphinx keyring==23.13.1 # via twine +kombu==5.2.4 + # via + # -r requirements/test.txt + # celery markdown-it-py==2.2.0 # via rich markupsafe==2.1.2 @@ -87,6 +142,10 @@ mdurl==0.1.2 # via markdown-it-py more-itertools==9.1.0 # via jaraco-classes +newrelic==8.8.0 + # via + # -r requirements/test.txt + # edx-django-utils packaging==23.1 # via # -r requirements/test.txt @@ -104,6 +163,18 @@ pluggy==1.0.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/test.txt + # edx-django-utils +pycparser==2.21 + # via + # -r requirements/test.txt + # cffi pydata-sphinx-theme==0.13.3 # via sphinx-book-theme pygments==2.15.1 @@ -114,6 +185,14 @@ pygments==2.15.1 # readme-renderer # rich # sphinx +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pyproject-hooks==1.0.0 # via build pytest==7.3.1 @@ -133,28 +212,37 @@ pytz==2023.3 # via # -r requirements/test.txt # babel + # celery # django pyyaml==6.0 # via # -r requirements/test.txt # code-annotations + # responses readme-renderer==37.3 # via twine -requests==2.28.2 +requests==2.29.0 # via + # -r requirements/test.txt # requests-toolbelt + # responses # sphinx # twine -requests-toolbelt==0.10.1 +requests-toolbelt==1.0.0 # via twine +responses==0.23.1 + # via -r requirements/test.txt restructuredtext-lint==1.4.0 # via doc8 rfc3986==2.0.0 # via twine -rich==13.3.4 +rich==13.3.5 # via twine six==1.16.0 - # via bleach + # via + # -r requirements/test.txt + # bleach + # click-repl snowballstemmer==2.2.0 # via sphinx soupsieve==2.4.1 @@ -188,6 +276,8 @@ stevedore==5.0.0 # -r requirements/test.txt # code-annotations # doc8 + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/test.txt @@ -202,14 +292,30 @@ tomli==2.0.1 # pytest twine==4.0.2 # via -r requirements/doc.in +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # pydata-sphinx-theme # rich urllib3==1.26.15 # via + # -r requirements/test.txt # requests + # responses # twine +vine==5.0.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/test.txt + # prompt-toolkit webencodings==0.5.1 # via bleach zipp==3.15.0 diff --git a/requirements/pip.txt b/requirements/pip.txt index 4767414..e6827ba 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -8,7 +8,7 @@ wheel==0.40.0 # via -r requirements/pip.in # The following packages are considered to be unsafe in a requirements file: -pip==23.1.1 +pip==23.1.2 # via -r requirements/pip.in setuptools==67.7.2 # via -r requirements/pip.in diff --git a/requirements/quality.txt b/requirements/quality.txt index 1e3309c..241350d 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/test.txt + # kombu asgiref==3.6.0 # via # -r requirements/test.txt @@ -12,19 +16,54 @@ astroid==2.15.4 # via # pylint # pylint-celery +billiard==3.6.4.0 + # via + # -r requirements/test.txt + # celery +celery==5.2.7 + # via -r requirements/test.txt +certifi==2022.12.7 + # via + # -r requirements/test.txt + # requests +cffi==1.15.1 + # via + # -r requirements/test.txt + # pynacl +charset-normalizer==3.1.0 + # via + # -r requirements/test.txt + # requests click==8.1.3 # via # -r requirements/test.txt + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery click-log==0.4.0 # via edx-lint +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.3.0 # via # -r requirements/test.txt # edx-lint -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via # -r requirements/test.txt # pytest-cov @@ -34,12 +73,30 @@ django==3.2.18 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/test.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/test.txt edx-lint==5.3.4 # via -r requirements/quality.in +edx-opaque-keys==2.3.0 + # via -r requirements/test.txt exceptiongroup==1.1.1 # via # -r requirements/test.txt # pytest +idna==3.4 + # via + # -r requirements/test.txt + # requests iniconfig==2.0.0 # via # -r requirements/test.txt @@ -52,6 +109,10 @@ jinja2==3.1.2 # via # -r requirements/test.txt # code-annotations +kombu==5.2.4 + # via + # -r requirements/test.txt + # celery lazy-object-proxy==1.9.0 # via astroid markupsafe==2.1.2 @@ -60,6 +121,10 @@ markupsafe==2.1.2 # jinja2 mccabe==0.7.0 # via pylint +newrelic==8.8.0 + # via + # -r requirements/test.txt + # edx-django-utils packaging==23.1 # via # -r requirements/test.txt @@ -68,17 +133,29 @@ pbr==5.11.1 # via # -r requirements/test.txt # stevedore -platformdirs==3.2.0 +platformdirs==3.5.0 # via pylint pluggy==1.0.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/test.txt + # edx-django-utils pycodestyle==2.10.0 # via -r requirements/quality.in +pycparser==2.21 + # via + # -r requirements/test.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.in -pylint==2.17.2 +pylint==2.17.3 # via # edx-lint # pylint-celery @@ -92,6 +169,14 @@ pylint-plugin-utils==0.7 # via # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pytest==7.3.1 # via # -r requirements/test.txt @@ -108,13 +193,24 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/test.txt + # celery # django pyyaml==6.0 # via # -r requirements/test.txt # code-annotations + # responses +requests==2.29.0 + # via + # -r requirements/test.txt + # responses +responses==0.23.1 + # via -r requirements/test.txt six==1.16.0 - # via edx-lint + # via + # -r requirements/test.txt + # click-repl + # edx-lint snowballstemmer==2.2.0 # via pydocstyle sqlparse==0.4.4 @@ -125,6 +221,8 @@ stevedore==5.0.0 # via # -r requirements/test.txt # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via # -r requirements/test.txt @@ -135,11 +233,30 @@ tomli==2.0.1 # coverage # pylint # pytest -tomlkit==0.11.7 +tomlkit==0.11.8 # via pylint +types-pyyaml==6.0.12.9 + # via + # -r requirements/test.txt + # responses typing-extensions==4.5.0 # via # astroid # pylint +urllib3==1.26.15 + # via + # -r requirements/test.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/test.txt + # prompt-toolkit wrapt==1.15.0 # via astroid diff --git a/requirements/test.in b/requirements/test.in index 6797160..bb78ea0 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -6,3 +6,4 @@ pytest-cov # pytest extension for code coverage statistics pytest-django # pytest extension for better Django support code-annotations # provides commands used by the pii_check make target. +responses # mocks for the requests library diff --git a/requirements/test.txt b/requirements/test.txt index 4dbf8d8..dd30475 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,33 +4,122 @@ # # make upgrade # +amqp==5.1.1 + # via + # -r requirements/base.txt + # kombu asgiref==3.6.0 # via # -r requirements/base.txt # django +billiard==3.6.4.0 + # via + # -r requirements/base.txt + # celery +celery==5.2.7 + # via -r requirements/base.txt +certifi==2022.12.7 + # via + # -r requirements/base.txt + # requests +cffi==1.15.1 + # via + # -r requirements/base.txt + # pynacl +charset-normalizer==3.1.0 + # via + # -r requirements/base.txt + # requests click==8.1.3 - # via code-annotations + # via + # -r requirements/base.txt + # celery + # click-didyoumean + # click-plugins + # click-repl + # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/base.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/base.txt + # celery +click-repl==0.2.0 + # via + # -r requirements/base.txt + # celery code-annotations==1.3.0 # via -r requirements/test.in -coverage[toml]==7.2.3 +coverage[toml]==7.2.5 # via pytest-cov # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.txt + # django-crum + # edx-django-utils +django-crum==0.7.9 + # via + # -r requirements/base.txt + # edx-django-utils +django-waffle==3.0.0 + # via + # -r requirements/base.txt + # edx-django-utils +edx-django-utils==5.4.0 + # via -r requirements/base.txt +edx-opaque-keys==2.3.0 + # via -r requirements/base.txt exceptiongroup==1.1.1 # via pytest +idna==3.4 + # via + # -r requirements/base.txt + # requests iniconfig==2.0.0 # via pytest jinja2==3.1.2 # via code-annotations +kombu==5.2.4 + # via + # -r requirements/base.txt + # celery markupsafe==2.1.2 # via jinja2 +newrelic==8.8.0 + # via + # -r requirements/base.txt + # edx-django-utils packaging==23.1 # via pytest pbr==5.11.1 - # via stevedore + # via + # -r requirements/base.txt + # stevedore pluggy==1.0.0 # via pytest +prompt-toolkit==3.0.38 + # via + # -r requirements/base.txt + # click-repl +psutil==5.9.5 + # via + # -r requirements/base.txt + # edx-django-utils +pycparser==2.21 + # via + # -r requirements/base.txt + # cffi +pymongo==3.13.0 + # via + # -r requirements/base.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/base.txt + # edx-django-utils pytest==7.3.1 # via # pytest-cov @@ -44,18 +133,52 @@ python-slugify==8.0.1 pytz==2023.3 # via # -r requirements/base.txt + # celery # django pyyaml==6.0 - # via code-annotations + # via + # code-annotations + # responses +requests==2.29.0 + # via + # -r requirements/base.txt + # responses +responses==0.23.1 + # via -r requirements/test.in +six==1.16.0 + # via + # -r requirements/base.txt + # click-repl sqlparse==0.4.4 # via # -r requirements/base.txt # django stevedore==5.0.0 - # via code-annotations + # via + # -r requirements/base.txt + # code-annotations + # edx-django-utils + # edx-opaque-keys text-unidecode==1.3 # via python-slugify tomli==2.0.1 # via # coverage # pytest +types-pyyaml==6.0.12.9 + # via responses +urllib3==1.26.15 + # via + # -r requirements/base.txt + # requests + # responses +vine==5.0.0 + # via + # -r requirements/base.txt + # amqp + # celery + # kombu +wcwidth==0.2.6 + # via + # -r requirements/base.txt + # prompt-toolkit diff --git a/setup.py b/setup.py index ea1feb7..55e68c3 100755 --- a/setup.py +++ b/setup.py @@ -112,7 +112,13 @@ def is_requirement(line): include=['event_sink_clickhouse', 'event_sink_clickhouse.*'], exclude=["*tests"], ), - + entry_points={ + "lms.djangoapp": [ + ], + "cms.djangoapp": [ + "event-sink-clickhouse = event_sink_clickhouse.apps:EventSinkClickhouseConfig", + ], + }, include_package_data=True, install_requires=load_requirements('requirements/base.in'), python_requires=">=3.8", diff --git a/test_utils/helpers.py b/test_utils/helpers.py new file mode 100644 index 0000000..b1de1fd --- /dev/null +++ b/test_utils/helpers.py @@ -0,0 +1,210 @@ +""" +Helper functions for tests +""" + +import csv +import random +import string +from datetime import datetime +from io import StringIO +from unittest.mock import MagicMock, Mock + +from opaque_keys.edx.keys import CourseKey +from opaque_keys.edx.locator import BlockUsageLocator + +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink + +ORIG_IMPORT = __import__ +ORG = "testorg" +COURSE = "testcourse" +COURSE_RUN = "2023_Fall" + + +class FakeXBlock: + """ + Fakes the parameters of an XBlock that we care about. + """ + def __init__(self, identifier, detached_block=False): + self.block_type = "course_info" if detached_block else "vertical" + self.scope_ids = Mock() + self.scope_ids.usage_id.course_key = course_key_factory() + self.scope_ids.block_type = self.block_type + self.location = block_usage_locator_factory() + self.display_name_with_default = f"Display name {identifier}" + self.edited_on = datetime.now() + self.children = [] + + def get_children(self): + """ + Fakes the method of the same name from an XBlock. + """ + return self.children + + +def course_str_factory(): + """ + Return a valid course key string. + """ + course_str = f"course-v1:{ORG}+{COURSE}+{COURSE_RUN}" + return course_str + + +def course_key_factory(): + """ + Return a CourseKey object from our course key string. + """ + return CourseKey.from_string(course_str_factory()) + + +def block_usage_locator_factory(): + """ + Create a BlockUsageLocator with a random id. + """ + block_id = ''.join(random.choices(string.ascii_letters, k=10)) + return BlockUsageLocator(course_key_factory(), block_type="category", block_id=block_id, deprecated=True) + + +def mock_course_overview(): + """ + Create a fake CourseOverview object that supports just the things we care about. + """ + mock_overview = MagicMock() + mock_overview.get_from_id = MagicMock() + mock_overview.get_from_id.return_value.modified = datetime.now() + return mock_overview + + +def mock_detached_xblock_types(): + """ + Mock the return results of xmodule.modulestore.store_utilities.DETACHED_XBLOCK_TYPES + """ + # Current values as of 2023-05-01 + return {'static_tab', 'about', 'course_info'} + + +def get_clickhouse_http_params(): + """ + Get the params used in ClickHouse queries. + """ + blocks_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_blocks FORMAT CSV" + } + relationships_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_relationships FORMAT CSV" + } + + return blocks_params, relationships_params + + +def course_factory(): + """ + Return a fake course structure that exercises most of the serialization features. + """ + # Create a base block + top_block = FakeXBlock("top") + course = [top_block, ] + + # Create a few children + for i in range(3): + block = FakeXBlock(f"Child {i}") + course.append(block) + top_block.children.append(block) + + # Create grandchildren on some children + if i > 0: + sub_block = FakeXBlock(f"Grandchild {i}") + course.append(sub_block) + block.children.append(sub_block) + + # Create some detached blocks at the top level + for i in range(3): + course.append(FakeXBlock(f"Detached {i}", detached_block=True)) + + return course + + +def check_block_csv_matcher(course): + """ + Match the course structure CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + def match(request): + body = request.body + lines = body.split("\n")[:-1] + + # There should be one CSV line for each block in the test course + if len(lines) != len(course): + return False, f"Body has {len(lines)} lines, course has {len(course)}" + + f = StringIO(body) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our course, make sure + # everything matches + for row in reader: + block = course[i] + assert row[0] == block.location.org + assert row[1] == str(block.location.course_key) + assert row[2] == block.location.course + assert row[3] == block.location.run + assert row[4] == str(course[i].location) + assert row[5] == block.display_name_with_default + assert row[6] == str(block.block_type) + i += 1 + except AssertionError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + return match + + +def check_relationship_csv_matcher(course): + """ + Match the relationship CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + # Build our own copy of the test relationships first + relationships = [] + for block in course: + course_key = str(block.location.course_key) + for _, child in enumerate(block.get_children()): + parent_node = str(CoursePublishedSink.strip_branch_and_version(block.location)) + child_node = str(CoursePublishedSink.strip_branch_and_version(child.location)) + relationships.append((course_key, parent_node, child_node)) + + def match(request): + body = request.body + lines = body.split("\n")[:-1] + + # The relationships CSV should have the same number of relationships as our test + if len(lines) != len(relationships): + return False, f"Body has {len(lines)} lines but there are {len(relationships)} relationships" + + f = StringIO(body) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our relationships, make sure + # everything matches + for row in reader: + print(row) + print(relationships[i]) + relation = relationships[i] + assert row[0] == relation[0] + assert row[1] == relation[1] + assert row[2] == relation[2] + i += 1 + except AssertionError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + return match diff --git a/tests/test_course_published.py b/tests/test_course_published.py new file mode 100644 index 0000000..e7bc4c5 --- /dev/null +++ b/tests/test_course_published.py @@ -0,0 +1,117 @@ +""" +Tests for the course_published sinks. +""" +import logging +from datetime import datetime +from unittest.mock import patch + +import pytest +import responses +from responses import matchers +from responses.registries import OrderedRegistry + +from event_sink_clickhouse.sinks.course_published import CoursePublishedSink +from event_sink_clickhouse.tasks import dump_course_to_clickhouse +from test_utils.helpers import ( + check_block_csv_matcher, + check_relationship_csv_matcher, + course_factory, + course_str_factory, + get_clickhouse_http_params, + mock_course_overview, + mock_detached_xblock_types, +) + + +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") +def test_course_publish_success(mock_modulestore, mock_detached, caplog): + """ + Test of a successful end-to-end run. + """ + # Necessary to get logs from the task + caplog.set_level(logging.INFO, logger="edx.celery.task") + + # Create a fake course structure with a few fake XBlocks + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values, including CSV + # content + blocks_params, relationships_params = get_clickhouse_http_params() + + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(blocks_params), + check_block_csv_matcher(course) + ], + ) + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(relationships_params), + check_relationship_csv_matcher(course) + ], + ) + + course = course_str_factory() + dump_course_to_clickhouse(course) + + # Just to make sure we're not calling things more than we need to + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + + +@responses.activate(registry=OrderedRegistry) # pylint: disable=unexpected-keyword-arg,no-value-for-parameter +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_detached_xblock_types") +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_modulestore") +def test_course_publish_clickhouse_error(mock_modulestore, mock_detached, caplog): + """ + Test the case where a ClickHouse POST fails. + """ + caplog.set_level(logging.INFO, logger="edx.celery.task") + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + mock_detached.return_value = mock_detached_xblock_types() + + # This will raise an exception when we try to post to ClickHouse + responses.post( + "https://foo.bar/", + body=Exception("Bogus test exception in ClickHouse call") + ) + + course = course_str_factory() + + with pytest.raises(Exception): + dump_course_to_clickhouse(course) + + # Make sure everything was called as we expect + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + + # Make sure our log message went through. + assert f"Error trying to dump course {course} to ClickHouse!" in caplog.text + + +@patch("event_sink_clickhouse.sinks.course_published.CoursePublishedSink._get_course_overview_model") +def test_get_course_last_published(mock_overview): + """ + This function isn't in use yet, but we'll need it for the management command + """ + # Create a fake course overview, which will return a datetime object + course = mock_course_overview() + mock_overview.return_value = course + + # Request our course last published date + course_key = course_str_factory() + + # Confirm that the string date we get back is a valid date + last_published_date = CoursePublishedSink.get_course_last_published(course_key) + dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f") + assert dt diff --git a/tests/test_django_settings.py b/tests/test_django_settings.py new file mode 100644 index 0000000..52ff506 --- /dev/null +++ b/tests/test_django_settings.py @@ -0,0 +1,55 @@ +""" +Test plugin settings +""" + +from django.conf import settings +from django.test import TestCase + +from event_sink_clickhouse.settings import common as common_settings +from event_sink_clickhouse.settings import production as production_setttings + + +class TestPluginSettings(TestCase): + """ + Tests plugin settings + """ + + def test_common_settings(self): + """ + Test common settings + """ + common_settings.plugin_settings(settings) + + for key in ("url", "username", "password", "database", "timeout_secs"): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + + def test_production_settings(self): + """ + Test production settings + """ + test_url = "https://foo.bar" + test_username = "bob" + test_password = "secret" + test_database = "cool_data" + test_timeout = 1 + + settings.ENV_TOKENS = { + 'EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG': { + "url": test_url, + "username": test_username, + "password": test_password, + "database": test_database, + "timeout_secs": test_timeout + } + } + production_setttings.plugin_settings(settings) + + for key, val in ( + ("url", test_url), + ("username", test_username), + ("password", test_password), + ("database", test_database), + ("timeout_secs", test_timeout), + ): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + assert settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG[key] == val