From ff17750df17d350235415c0fa6a54a35a941a16b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 12:27:44 +0200 Subject: [PATCH 1/9] fix: `make test` command Unit test command was failing because manage.py was pointing to an incorrect module by default. --- manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage.py b/manage.py index 074a0e3b..a301aba1 100755 --- a/manage.py +++ b/manage.py @@ -7,7 +7,7 @@ import sys if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edxsearch.settings") + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") from django.core.management import execute_from_command_line From 151f865f8cdac97b045cf72be691e73f9e77e60b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 13:15:40 +0200 Subject: [PATCH 2/9] feat: add convenient "make compile-requirements" command This command makes it possible to compile requirements without upgrading them. Note that the `make upgrade` command still works with this change. --- Makefile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index bea3f88d..adff3746 100644 --- a/Makefile +++ b/Makefile @@ -34,23 +34,26 @@ test_with_es: clean test.start_elasticsearch coverage run --source='.' manage.py test make test.stop_elasticsearch -upgrade: export CUSTOM_COMPILE_COMMAND=make upgrade -upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in +compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade +compile-requirements: ## Re-compile *.in requirements to *.txt (without upgrading) pip install -qr requirements/pip-tools.txt # Make sure to compile files after any other files they include! pip-compile --rebuild --allow-unsafe --rebuild -o requirements/pip.txt requirements/pip.in - pip-compile --rebuild --upgrade -o requirements/pip-tools.txt requirements/pip-tools.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/pip-tools.txt requirements/pip-tools.in pip install -qr requirements/pip.txt pip install -qr requirements/pip-tools.txt - pip-compile --rebuild --upgrade -o requirements/base.txt requirements/base.in - pip-compile --rebuild --upgrade -o requirements/testing.txt requirements/testing.in - pip-compile --rebuild --upgrade -o requirements/quality.txt requirements/quality.in - pip-compile --rebuild --upgrade -o requirements/ci.txt requirements/ci.in - pip-compile --rebuild --upgrade -o requirements/dev.txt requirements/dev.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/base.txt requirements/base.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/testing.txt requirements/testing.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/quality.txt requirements/quality.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/ci.txt requirements/ci.in + pip-compile --rebuild ${COMPILE_OPTS} -o requirements/dev.txt requirements/dev.in # Let tox control the Django version for tests sed '/^[dD]jango==/d' requirements/testing.txt > requirements/testing.tmp mv requirements/testing.tmp requirements/testing.txt +upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in + $(MAKE) compile-requirements COMPILE_OPTS="--upgrade" + test: test_with_es ## run tests and generate coverage report install-local: ## installs your local edx-search into the LMS and CMS python virtualenvs From 44d24242e4202742462ecdd66aa906cda55545cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 12:37:15 +0200 Subject: [PATCH 3/9] feat: add Meilisearch-compatible search engine The goal of this change is to introduce a search engine that is compatible with the edx-search API but that uses Meilisearch instead of Elasticsearch. That way, we can replace one by the other across edx-platform by simply changing a single SEARCH_ENGINE django setting. There are a couple of differences between Meilisearch and Elasticsearch: 1. Filterable attributes must be defined explicitly. 2. No support for datetime objects, which must be converted to timestamps (with an extra field to store the timezone). 3. No special characters allowed in the primary key values, such that we must hash course IDs before we can use them as primary key values. Note that this PR does not introduce any breaking change. This is an opt-in engine that anyone is free to use. There is some setup work for every search feature: see the engine module documentation for more information. See the corresponding conversation here: https://github.com/openedx/frontend-app-authoring/issues/1334#issuecomment-2401805382 --- requirements/base.in | 1 + requirements/base.txt | 29 +- requirements/dev.txt | 43 +++ requirements/quality.txt | 34 ++ requirements/testing.txt | 34 ++ search/meilisearch.py | 540 +++++++++++++++++++++++++++++++ search/tests/test_meilisearch.py | 258 +++++++++++++++ 7 files changed, 936 insertions(+), 3 deletions(-) create mode 100644 search/meilisearch.py create mode 100644 search/tests/test_meilisearch.py diff --git a/requirements/base.in b/requirements/base.in index 034b08a6..a758596f 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -15,3 +15,4 @@ Django # Web application framework elasticsearch>=7.8.0,<8.0.0 edx-toggles event-tracking +meilisearch diff --git a/requirements/base.txt b/requirements/base.txt index df520b14..c5679100 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,18 +6,26 @@ # amqp==5.2.0 # via kombu +annotated-types==0.7.0 + # via pydantic asgiref==3.8.1 # via django attrs==23.2.0 # via openedx-events billiard==4.2.0 # via celery +camel-converter[pydantic]==4.0.1 + # via meilisearch celery==5.4.0 # via event-tracking certifi==2024.7.4 - # via elasticsearch + # via + # elasticsearch + # requests cffi==1.16.0 # via pynacl +charset-normalizer==3.4.0 + # via requests click==8.1.7 # via # celery @@ -77,12 +85,16 @@ event-tracking==2.4.0 # -r requirements/base.in fastavro==1.9.5 # via openedx-events +idna==3.10 + # via requests jinja2==3.1.4 # via code-annotations kombu==5.3.7 # via celery markupsafe==2.1.5 # via jinja2 +meilisearch==0.31.5 + # via -r requirements/base.in newrelic==9.12.0 # via edx-django-utils openedx-events==9.11.0 @@ -95,6 +107,10 @@ psutil==6.0.0 # via edx-django-utils pycparser==2.22 # via cffi +pydantic==2.9.2 + # via camel-converter +pydantic-core==2.23.4 + # via pydantic pymongo==3.13.0 # via # edx-opaque-keys @@ -109,6 +125,8 @@ pytz==2024.1 # via event-tracking pyyaml==6.0.1 # via code-annotations +requests==2.32.3 + # via meilisearch six==1.16.0 # via # edx-ccx-keys @@ -124,11 +142,16 @@ stevedore==5.2.0 text-unidecode==1.3 # via python-slugify typing-extensions==4.12.2 - # via edx-opaque-keys + # via + # edx-opaque-keys + # pydantic + # pydantic-core tzdata==2024.1 # via celery urllib3==1.26.19 - # via elasticsearch + # via + # elasticsearch + # requests vine==5.1.0 # via # amqp diff --git a/requirements/dev.txt b/requirements/dev.txt index f8f96994..3a858be5 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -9,6 +9,11 @@ amqp==5.2.0 # -r requirements/quality.txt # -r requirements/testing.txt # kombu +annotated-types==0.7.0 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # pydantic asgiref==3.8.1 # via # -r requirements/quality.txt @@ -37,6 +42,11 @@ cachetools==5.4.0 # via # -r requirements/ci.txt # tox +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # meilisearch celery==5.4.0 # via # -r requirements/quality.txt @@ -47,6 +57,7 @@ certifi==2024.7.4 # -r requirements/quality.txt # -r requirements/testing.txt # elasticsearch + # requests cffi==1.16.0 # via # -r requirements/quality.txt @@ -56,6 +67,11 @@ chardet==5.2.0 # via # -r requirements/ci.txt # tox +charset-normalizer==3.4.0 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # requests click==8.1.7 # via # -r requirements/pip-tools.txt @@ -185,6 +201,11 @@ filelock==3.15.4 # -r requirements/ci.txt # tox # virtualenv +idna==3.10 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # requests iniconfig==2.0.0 # via # -r requirements/quality.txt @@ -213,6 +234,10 @@ mccabe==0.7.0 # via # -r requirements/quality.txt # pylint +meilisearch==0.31.5 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt mock==5.1.0 # via # -r requirements/quality.txt @@ -275,6 +300,16 @@ pycparser==2.22 # -r requirements/quality.txt # -r requirements/testing.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # pydantic pylint==3.2.6 # via # -r requirements/quality.txt @@ -344,6 +379,11 @@ pyyaml==6.0.1 # -r requirements/quality.txt # -r requirements/testing.txt # code-annotations +requests==2.32.3 + # via + # -r requirements/quality.txt + # -r requirements/testing.txt + # meilisearch six==1.16.0 # via # -r requirements/quality.txt @@ -380,6 +420,8 @@ typing-extensions==4.12.2 # -r requirements/quality.txt # -r requirements/testing.txt # edx-opaque-keys + # pydantic + # pydantic-core tzdata==2024.1 # via # -r requirements/quality.txt @@ -390,6 +432,7 @@ urllib3==1.26.19 # -r requirements/quality.txt # -r requirements/testing.txt # elasticsearch + # requests vine==5.1.0 # via # -r requirements/quality.txt diff --git a/requirements/quality.txt b/requirements/quality.txt index dcc955c6..52990d46 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -8,6 +8,10 @@ amqp==5.2.0 # via # -r requirements/testing.txt # kombu +annotated-types==0.7.0 + # via + # -r requirements/testing.txt + # pydantic asgiref==3.8.1 # via # -r requirements/testing.txt @@ -24,6 +28,10 @@ billiard==4.2.0 # via # -r requirements/testing.txt # celery +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/testing.txt + # meilisearch celery==5.4.0 # via # -r requirements/testing.txt @@ -32,10 +40,15 @@ certifi==2024.7.4 # via # -r requirements/testing.txt # elasticsearch + # requests cffi==1.16.0 # via # -r requirements/testing.txt # pynacl +charset-normalizer==3.4.0 + # via + # -r requirements/testing.txt + # requests click==8.1.7 # via # -r requirements/testing.txt @@ -130,6 +143,10 @@ fastavro==1.9.5 # via # -r requirements/testing.txt # openedx-events +idna==3.10 + # via + # -r requirements/testing.txt + # requests iniconfig==2.0.0 # via # -r requirements/testing.txt @@ -150,6 +167,8 @@ markupsafe==2.1.5 # jinja2 mccabe==0.7.0 # via pylint +meilisearch==0.31.5 + # via -r requirements/testing.txt mock==5.1.0 # via -r requirements/testing.txt newrelic==9.12.0 @@ -188,6 +207,14 @@ pycparser==2.22 # via # -r requirements/testing.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/testing.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/testing.txt + # pydantic pylint==3.2.6 # via # edx-lint @@ -233,6 +260,10 @@ pyyaml==6.0.1 # via # -r requirements/testing.txt # code-annotations +requests==2.32.3 + # via + # -r requirements/testing.txt + # meilisearch six==1.16.0 # via # -r requirements/testing.txt @@ -260,6 +291,8 @@ typing-extensions==4.12.2 # via # -r requirements/testing.txt # edx-opaque-keys + # pydantic + # pydantic-core tzdata==2024.1 # via # -r requirements/testing.txt @@ -268,6 +301,7 @@ urllib3==1.26.19 # via # -r requirements/testing.txt # elasticsearch + # requests vine==5.1.0 # via # -r requirements/testing.txt diff --git a/requirements/testing.txt b/requirements/testing.txt index cf3cb6eb..d7b68223 100644 --- a/requirements/testing.txt +++ b/requirements/testing.txt @@ -8,6 +8,10 @@ amqp==5.2.0 # via # -r requirements/base.txt # kombu +annotated-types==0.7.0 + # via + # -r requirements/base.txt + # pydantic asgiref==3.8.1 # via # -r requirements/base.txt @@ -20,6 +24,10 @@ billiard==4.2.0 # via # -r requirements/base.txt # celery +camel-converter[pydantic]==4.0.1 + # via + # -r requirements/base.txt + # meilisearch celery==5.4.0 # via # -r requirements/base.txt @@ -28,10 +36,15 @@ certifi==2024.7.4 # via # -r requirements/base.txt # elasticsearch + # requests cffi==1.16.0 # via # -r requirements/base.txt # pynacl +charset-normalizer==3.4.0 + # via + # -r requirements/base.txt + # requests click==8.1.7 # via # -r requirements/base.txt @@ -115,6 +128,10 @@ fastavro==1.9.5 # via # -r requirements/base.txt # openedx-events +idna==3.10 + # via + # -r requirements/base.txt + # requests iniconfig==2.0.0 # via pytest jinja2==3.1.4 @@ -129,6 +146,8 @@ markupsafe==2.1.5 # via # -r requirements/base.txt # jinja2 +meilisearch==0.31.5 + # via -r requirements/base.txt mock==5.1.0 # via -r requirements/testing.in newrelic==9.12.0 @@ -159,6 +178,14 @@ pycparser==2.22 # via # -r requirements/base.txt # cffi +pydantic==2.9.2 + # via + # -r requirements/base.txt + # camel-converter +pydantic-core==2.23.4 + # via + # -r requirements/base.txt + # pydantic pymongo==3.13.0 # via # -r requirements/base.txt @@ -188,6 +215,10 @@ pyyaml==6.0.1 # via # -r requirements/base.txt # code-annotations +requests==2.32.3 + # via + # -r requirements/base.txt + # meilisearch six==1.16.0 # via # -r requirements/base.txt @@ -212,6 +243,8 @@ typing-extensions==4.12.2 # via # -r requirements/base.txt # edx-opaque-keys + # pydantic + # pydantic-core tzdata==2024.1 # via # -r requirements/base.txt @@ -220,6 +253,7 @@ urllib3==1.26.19 # via # -r requirements/base.txt # elasticsearch + # requests vine==5.1.0 # via # -r requirements/base.txt diff --git a/search/meilisearch.py b/search/meilisearch.py new file mode 100644 index 00000000..75636526 --- /dev/null +++ b/search/meilisearch.py @@ -0,0 +1,540 @@ +""" +This is a search engine for Meilisearch. It implements the edx-search's SearchEngine +API, such that it can be setup as a drop-in replacement for the ElasticSearchEngine. To +switch to this engine, you should run a Meilisearch instance and define the following +setting: + + SEARCH_ENGINE = "search.meilisearch.MeilisearchEngine" + +You will then need to create the new indices by running: + + ./manage.py lms shell -c "import search.meilisearch; search.meilisearch.create_indexes()" + +For more information about the Meilisearch API in Python, check +https://github.com/meilisearch/meilisearch-python + +When implementing a new index, you might discover that you need to list explicit filterable +fields. Typically, you try to index new documents, and Meilisearch fails with the +following response: + + meilisearch.errors.MeilisearchApiError: MeilisearchApiError. Error code: invalid_search_filter. + Error message: Attribute `field3` is not filterable. Available filterable attributes are: + `field1 field2 _pk`. + +In such cases, the filterable field should be added to INDEX_FILTERABLES below. And you should +then run the `create_indexes()` function again, as indicated above. + +This search engine was tested for the following indexes: + +1. course_info ("course discovery"): + - Enable the course discovery feature: FEATURES["ENABLE_COURSE_DISCOVERY"] = True + - A search bar appears in the LMS landing page. + - Content is automatically indexed every time a course's "schedule & details" are + edited in the studio, course content is edited or the "reindex" button is clicked. + +2. courseware_content ("courseware search"): + - Enable the courseware search waffle flag: + + ./manage.py lms waffle_flag --create --everyone courseware.mfe_courseware_search + + - Enable the following feature flags: + + FEATURES["ENABLE_COURSEWARE_INDEX"] = True + FEATURES["ENABLE_COURSEWARE_SEARCH"] = True + + - Courseware content will be indexed by editing course sections and units. + - Alternatively, click the "Reindex" button in the Studio. + - Alternatively, index all courses by running: ./manage.py cms reindex_course --active + - In the learning MFE, a course search bar appears when opening a course. + +Note that the index names could be tuned with the COURSEWARE_INFO_INDEX_NAME and +COURSEWARE_CONTENT_INDEX_NAME settings. However, if you decide to change these settings, +beware that many other applications do not respect them... + +When facing issues with Meilisearch during indexing, you may want to look at the +Meilisearch logs. You might notice that some indexing tasks failed. In such cases, you +can troubleshoot these tasks by printing them with: + + ./manage.py lms shell -c "import search.meilisearch; search.meilisearch.print_failed_meilisearch_tasks()" +""" + +from copy import deepcopy +from datetime import datetime +import hashlib +import json +import logging +import typing as t + +import meilisearch + +from django.conf import settings +from django.utils import timezone + +from search.search_engine_base import SearchEngine +from search.utils import ValueRange + + +MEILISEARCH_API_KEY = getattr(settings, "MEILISEARCH_API_KEY", "") +MEILISEARCH_URL = getattr(settings, "MEILISEARCH_URL", "http://meilisearch") +MEILISEARCH_INDEX_PREFIX = getattr(settings, "MEILISEARCH_INDEX_PREFIX", "") + + +logger = logging.getLogger(__name__) + + +PRIMARY_KEY_FIELD_NAME = "_pk" +UTC_OFFSET_SUFFIX = "__utcoffset" + + +# In Meilisearch, we need to explicitly list fields for which we expect to define +# filters and aggregation functions. +# This is different than Elasticsearch where we can aggregate results over any field. +# Here, we list facet fields per index. +# Reference: https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters +# Note that index names are hard-coded here, because they are hardcoded anyway across all of edx-search. +INDEX_FILTERABLES: dict[str, list[str]] = { + getattr(settings, "COURSEWARE_INFO_INDEX_NAME", "course_info"): [ + "language", # aggregate by language, mode, org + "modes", + "org", + "catalog_visibility", # exclude visibility="none" + "enrollment_end", # include only enrollable courses + ], + getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content"): [ + PRIMARY_KEY_FIELD_NAME, # exclude some specific documents based on ID + "course", # search courseware content by course + "org", # used during indexing + "start_date", # limit search to started courses + ], +} + + +class MeilisearchEngine(SearchEngine): + """ + Meilisearch-compatible search engine. We work very hard to produce an output that is + compliant with edx-search's ElasticSearchEngine. + """ + + def __init__(self, index=None): + super().__init__(index=index) + self.meilisearch_index = get_meilisearch_index(self.index_name) + + @property + def meilisearch_index_name(self): + """ + The index UID is its name. + """ + return self.meilisearch_index.uid + + def index(self, sources: list[dict[str, t.Any]], **kwargs): + """ + Index a number of documents, which can have just any type. + """ + logger.info( + "Index request: index=%s sources=%s kwargs=%s", + self.meilisearch_index_name, + sources, + kwargs, + ) + processed_documents = [process_document(source) for source in sources] + self.meilisearch_index.add_documents( + processed_documents, serializer=DocumentEncoder + ) + + def search( + self, + query_string=None, + field_dictionary=None, + filter_dictionary=None, + exclude_dictionary=None, + aggregation_terms=None, + # exclude_ids=None, # deprecated + # use_field_match=False, # deprecated + log_search_params=False, + **kwargs, + ): # pylint: disable=too-many-arguments + """ + See meilisearch docs: https://www.meilisearch.com/docs/reference/api/search + """ + opt_params = get_search_params( + field_dictionary=field_dictionary, + filter_dictionary=filter_dictionary, + exclude_dictionary=exclude_dictionary, + aggregation_terms=aggregation_terms, + **kwargs, + ) + if log_search_params: + logger.info("Search query: opt_params=%s", opt_params) + meilisearch_results = self.meilisearch_index.search(query_string, opt_params) + processed_results = process_results(meilisearch_results, self.index_name) + return processed_results + + def remove(self, doc_ids, **kwargs): + """ + Removing documents from the index is as simple as deleting the the documents + with the corresponding primary key. + """ + logger.info( + "Remove request: index=%s, doc_ids=%s kwargs=%s", + self.meilisearch_index_name, + doc_ids, + kwargs, + ) + doc_pks = [id2pk(doc_id) for doc_id in doc_ids] + if doc_pks: + self.meilisearch_index.delete_documents(doc_pks) + + +class DocumentEncoder(json.JSONEncoder): + """ + Custom encoder, useful in particular to encode datetime fields. + Ref: https://github.com/meilisearch/meilisearch-python?tab=readme-ov-file#custom-serializer-for-documents- + """ + + def default(self, o): + if isinstance(o, datetime): + return str(o) + return super().default(o) + + +def print_failed_meilisearch_tasks(count: int = 10): + """ + Useful function for troubleshooting. + + Since indexing tasks are asynchronous, sometimes they fail and it's tricky to figure + out why. This will print failed tasks to stdout. + """ + client = get_meilisearch_client() + for result in client.task_handler.get_tasks( + {"statuses": "failed", "limit": count} + ).results: + print(result) + + +def create_indexes(index_filterables: dict[str, list[str]] = None): + """ + This is an initialization function that creates indexes and makes sure that they + support the right facetting. + + The `index_filterables` will default to `INDEX_FILTERABLES` if undefined. Developers + can use this function to configure their own indices. + """ + if index_filterables is None: + index_filterables = INDEX_FILTERABLES + + client = get_meilisearch_client() + for index_name, filterables in index_filterables.items(): + meilisearch_index_name = get_meilisearch_index_name(index_name) + try: + index = client.get_index(meilisearch_index_name) + except meilisearch.errors.MeilisearchApiError as e: + if e.code != "index_not_found": + raise + client.create_index( + meilisearch_index_name, {"primaryKey": PRIMARY_KEY_FIELD_NAME} + ) + # Get the index again + index = client.get_index(meilisearch_index_name) + + # Update filterables if there are some new elements + if filterables: + existing_filterables = set(index.get_filterable_attributes()) + if not set(filterables).issubset(existing_filterables): + all_filterables = list(existing_filterables.union(filterables)) + index.update_filterable_attributes(all_filterables) + + +def get_meilisearch_index(index_name: str): + """ + Return a meilisearch index. + + Note that the index may not exist, and it will be created on first insertion. + ideally, the initialisation function `create_indexes` should be run first. + """ + meilisearch_client = get_meilisearch_client() + meilisearch_index_name = get_meilisearch_index_name(index_name) + return meilisearch_client.index(meilisearch_index_name) + + +def get_meilisearch_client(): + return meilisearch.Client(MEILISEARCH_URL, api_key=MEILISEARCH_API_KEY) + + +def get_meilisearch_index_name(index_name: str) -> str: + """ + Return the index name in Meilisearch associated to a hard-coded index name. + + This is useful for multi-tenant Meilisearch: just define a different prefix for + every tenant. + + Usually, meilisearch API keys are allowed to access only certain index prefixes. + Make sure that your API key matches the prefix. + """ + return MEILISEARCH_INDEX_PREFIX + index_name + + +def process_document(doc: dict[str, t.Any]) -> dict[str, t.Any]: + """ + Process document before indexing. + + We make a copy to avoid modifying the source document. + """ + processed = process_nested_document(doc) + + # Add primary key field + processed[PRIMARY_KEY_FIELD_NAME] = id2pk(doc["id"]) + + return processed + + +def process_nested_document(doc: dict[str, t.Any]) -> dict[str, t.Any]: + """ + Process nested dict inside top-level Meilisearch document. + """ + processed = {} + for key, value in doc.items(): + if isinstance(value, timezone.datetime): + # Convert datetime objects to timestamp, and store the timezone in a + # separate field with a suffix given by UTC_OFFSET_SUFFIX. + utcoffset = None + if value.tzinfo: + utcoffset = value.utcoffset().seconds + processed[key] = value.timestamp() + processed[f"{key}{UTC_OFFSET_SUFFIX}"] = utcoffset + elif isinstance(value, dict): + processed[key] = process_nested_document(value) + else: + # Pray that there are not datetime objects inside lists. + # If there are, they will be converted to str by the DocumentEncoder. + processed[key] = value + return processed + + +def id2pk(value: str) -> str: + """ + Convert a document "id" field into a primary key that is compatible with Meilisearch. + + This step is necessary because the "id" is typically a course id, which includes + colon ":" characters, which are not supported by Meilisearch. Source: + https://www.meilisearch.com/docs/learn/getting_started/primary_key#formatting-the-document-id + """ + return hashlib.sha1(value.encode()).hexdigest() + + +def get_search_params( + field_dictionary=None, + filter_dictionary=None, + exclude_dictionary=None, + aggregation_terms=None, + **kwargs, +) -> dict[str, t.Any]: + """ + Return a dictionary of parameters that should be passed to the Meilisearch client + `.search()` method. + """ + params = {"showRankingScore": True} + + # Aggregation + if aggregation_terms: + params["facets"] = list(aggregation_terms.keys()) + + # Exclusion and inclusion filters + filters = [] + if field_dictionary: + filters += get_filter_rules(field_dictionary) + if filter_dictionary: + filters += get_filter_rules(filter_dictionary, optional=True) + if exclude_dictionary: + filters += get_filter_rules(exclude_dictionary, exclude=True) + if filters: + params["filter"] = filters + + # Offset/Size + if "from_" in kwargs: + params["offset"] = kwargs["from_"] + if "size" in kwargs: + params["limit"] = kwargs["size"] + + return params + + +def get_filter_rules( + rule_dict: dict[str, t.Any], exclude: bool = False, optional: bool = False +) -> list[str]: + """ + Convert inclusion/exclusion rules. + """ + rules = [] + for key, value in rule_dict.items(): + if isinstance(value, list): + for v in value: + rules.append( + get_filter_rule(key, v, exclude=exclude, optional=optional) + ) + else: + rules.append( + get_filter_rule(key, value, exclude=exclude, optional=optional) + ) + return rules + + +def get_filter_rule( + key: str, value: str, exclude: bool = False, optional: bool = False +) -> str: + """ + Meilisearch filter rule. + + See: https://www.meilisearch.com/docs/learn/filtering_and_sorting/filter_expression_reference + """ + prefix = "NOT " if exclude else "" + if key == "id": + key = PRIMARY_KEY_FIELD_NAME + value = id2pk(value) + if isinstance(value, str): + rule = f'{prefix}{key} = "{value}"' + elif isinstance(value, ValueRange): + constraints = [] + lower = value.lower + if isinstance(lower, timezone.datetime): + lower = lower.timestamp() + upper = value.upper + if isinstance(upper, timezone.datetime): + upper = upper.timestamp() + # I know that the following fails if value == 0, but we are being + # consistent with the behaviour in the elasticsearch engine. + if lower: + constraints.append(f"{key} >= {lower}") + if upper: + constraints.append(f"{key} <= {upper}") + rule = " AND ".join(constraints) + if len(constraints) > 1: + rule = f"({rule})" + else: + raise ValueError(f"Unknown value type: {value.__class__}") + if optional: + rule += f" OR {key} NOT EXISTS" + return rule + + +def process_results(results: dict[str, t.Any], index_name: str) -> dict[str, t.Any]: + """ + Convert results produced by Meilisearch into results that are compatible with the + edx-search engine API. + + Example input: + + { + 'hits': [ + { + 'pk': 'f381d4f1914235c9532576c0861d09b484ade634', + 'id': 'course-v1:OpenedX+DemoX+DemoCourse', + ... + "_rankingScore": 0.865, + }, + ... + ], + 'query': 'demo', + 'processingTimeMs': 0, + 'limit': 20, + 'offset': 0, + 'estimatedTotalHits': 1 + } + + Example output: + + { + 'took': 13, + 'total': 1, + 'max_score': 0.4001565, + 'results': [ + { + '_index': 'course_info', + '_type': '_doc', + '_id': 'course-v1:OpenedX+DemoX+DemoCourse', + '_ignored': ['content.overview.keyword'], # removed + 'data': { + 'id': 'course-v1:OpenedX+DemoX+DemoCourse', + 'course': 'course-v1:OpenedX+DemoX+DemoCourse', + 'content': { + 'display_name': 'Open edX Demo Course', + ... + }, + 'image_url': '/asset-v1:OpenedX+DemoX+DemoCourse+type@asset+block@thumbnail_demox.jpeg', + 'start': '2020-01-01T00:00:00+00:00', + ... + }, + 'score': 0.4001565 + } + ], + 'aggs': { + 'modes': { + 'terms': {'audit': 1}, + 'total': 1.0, + 'other': 0 + }, + 'org': { + 'terms': {'OpenedX': 1}, 'total': 1.0, 'other': 0 + }, + 'language': {'terms': {'en': 1}, 'total': 1.0, 'other': 0} + } + } + """ + # Base + processed = { + "took": results["processingTimeMs"], + "total": results["estimatedTotalHits"], + "results": [], + "aggs": {}, + } + + # Hits + max_score = 0 + for result in results["hits"]: + result = process_hit(result) + score = result.pop("_rankingScore") + max_score = max(max_score, score) + processed_result = { + "_id": result["id"], + "_index": index_name, + "_type": "_doc", + "data": result, + } + processed["results"].append(processed_result) + processed["max_score"] = max_score + + # Aggregates/Facets + for facet_name, facet_distribution in results.get("facetDistribution", {}).items(): + total = sum(facet_distribution.values()) + processed["aggs"][facet_name] = { + "terms": facet_distribution, + "total": total, + "other": 0, + } + return processed + + +def process_hit(hit: dict[str, t.Any]) -> dict[str, t.Any]: + """ + Convert a search result back to the ES format. + """ + processed = deepcopy(hit) + + # Remove primary key field + try: + processed.pop(PRIMARY_KEY_FIELD_NAME) + except KeyError: + pass + + # Convert datetime fields back to datetime + for key in list(processed.keys()): + if key.endswith(UTC_OFFSET_SUFFIX): + utcoffset = processed.pop(key) + key = key[: -len(UTC_OFFSET_SUFFIX)] + timestamp = hit[key] + tz = ( + timezone.get_fixed_timezone(timezone.timedelta(seconds=utcoffset)) + if utcoffset + else None + ) + processed[key] = timezone.datetime.fromtimestamp(timestamp, tz=tz) + return processed diff --git a/search/tests/test_meilisearch.py b/search/tests/test_meilisearch.py new file mode 100644 index 00000000..104d6a25 --- /dev/null +++ b/search/tests/test_meilisearch.py @@ -0,0 +1,258 @@ +""" +Test for the Meilisearch search engine. +""" + +from datetime import datetime + +import django.test +from django.utils import timezone +import pytest + +from search.utils import DateRange, ValueRange +import search.meilisearch + + +class DocumentEncoderTests(django.test.TestCase): + """ + JSON encoder unit tests. + """ + + def test_document_encode_without_timezone(self): + document = { + "date": timezone.datetime(2024, 12, 31, 5, 0, 0), + } + encoder = search.meilisearch.DocumentEncoder() + encoded = encoder.encode(document) + assert '{"date": "2024-12-31 05:00:00"}' == encoded + + def test_document_encode_with_timezone(self): + document = { + "date": timezone.datetime( + 2024, 12, 31, 5, 0, 0, tzinfo=timezone.get_fixed_timezone(0) + ), + } + encoder = search.meilisearch.DocumentEncoder() + encoded = encoder.encode(document) + assert '{"date": "2024-12-31 05:00:00+00:00"}' == encoded + + +class EngineTests(django.test.TestCase): + """ + MeilisearchEngine tests. + """ + + def test_index_empty_document(self): + assert not search.meilisearch.process_nested_document({}) + + def test_index_empty_document_raises_key_error(self): + with pytest.raises(KeyError): + search.meilisearch.process_document({}) + + def test_index(self): + document = { + "id": "abcd", + "name": "My name", + "title": "My title", + } + processed = search.meilisearch.process_document(document) + + # Check that the source document was not modified + self.assertNotIn(search.meilisearch.PRIMARY_KEY_FIELD_NAME, document) + + # "id" field is preserved + assert "abcd" == processed["id"] + + # Primary key field + # can be verified with: echo -n "abcd" | sha1sum + pk = "81fe8bfe87576c3ecb22426f8e57847382917acf" + assert pk == processed[search.meilisearch.PRIMARY_KEY_FIELD_NAME] + + # Additional fields + assert "My name" == processed["name"] + assert "My title" == processed["title"] + + def test_index_recursive(self): + document = {"field": {"value": timezone.datetime(2024, 1, 1)}} + processed = search.meilisearch.process_nested_document(document) + assert { + "field": { + "value": 1704067200.0, + "value__utcoffset": None, + } + } == processed + + def test_index_datetime_no_tz(self): + # No timezone + document = {"id": "1", "dt": timezone.datetime(2024, 1, 1)} + processed = search.meilisearch.process_document(document) + assert 1704067200.0 == processed["dt"] + assert processed["dt__utcoffset"] is None + # reverse serialisation + reverse = search.meilisearch.process_hit(processed) + assert document == reverse + + def test_index_datetime_with_tz(self): + # With timezone + document = { + "id": "1", + "dt": timezone.datetime( + 2024, + 1, + 1, + tzinfo=timezone.get_fixed_timezone(timezone.timedelta(seconds=3600)), + ), + } + processed = search.meilisearch.process_document(document) + assert 1704063600.0 == processed["dt"] + assert 3600 == processed["dt__utcoffset"] + # reverse serialisation + reverse = search.meilisearch.process_hit(processed) + assert document == reverse + + def test_search(self): + meilisearch_results = { + "hits": [ + { + "id": "id1", + search.meilisearch.PRIMARY_KEY_FIELD_NAME: search.meilisearch.id2pk( + "id1" + ), + "title": "title 1", + "_rankingScore": 0.8, + }, + { + "id": "id2", + search.meilisearch.PRIMARY_KEY_FIELD_NAME: search.meilisearch.id2pk( + "id2" + ), + "title": "title 2", + "_rankingScore": 0.2, + }, + ], + "query": "demo", + "processingTimeMs": 14, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + } + processed_results = search.meilisearch.process_results( + meilisearch_results, "index_name" + ) + assert 14 == processed_results["took"] + assert 2 == processed_results["total"] + assert 0.8 == processed_results["max_score"] + + assert 2 == len(processed_results["results"]) + assert { + "_id": "id1", + "_index": "index_name", + "_type": "_doc", + "data": { + "id": "id1", + "title": "title 1", + }, + } == processed_results["results"][0] + + assert { + "_id": "id2", + "_index": "index_name", + "_type": "_doc", + "data": { + "id": "id2", + "title": "title 2", + }, + } == processed_results["results"][1] + + def test_search_with_facets(self): + meilisearch_results = { + "hits": [], + "query": "", + "processingTimeMs": 1, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + "facetDistribution": { + "modes": {"audit": 1, "honor": 3}, + "facet2": {"val1": 1, "val2": 2, "val3": 3}, + }, + } + processed_results = search.meilisearch.process_results( + meilisearch_results, "index_name" + ) + aggs = processed_results["aggs"] + assert { + "terms": {"audit": 1, "honor": 3}, + "total": 4.0, + "other": 0, + } == aggs["modes"] + + def test_search_params(self): + params = search.meilisearch.get_search_params() + self.assertTrue(params["showRankingScore"]) + + params = search.meilisearch.get_search_params(from_=0) + assert 0 == params["offset"] + + def test_search_params_exclude_dictionary(self): + # Simple value + params = search.meilisearch.get_search_params( + exclude_dictionary={"course_visibility": "none"} + ) + assert ['NOT course_visibility = "none"'] == params["filter"] + + # Multiple IDs + params = search.meilisearch.get_search_params( + exclude_dictionary={"id": ["1", "2"]} + ) + assert [ + f'NOT {search.meilisearch.PRIMARY_KEY_FIELD_NAME} = "{search.meilisearch.id2pk("1")}"', + f'NOT {search.meilisearch.PRIMARY_KEY_FIELD_NAME} = "{search.meilisearch.id2pk("2")}"', + ] == params["filter"] + + def test_search_params_field_dictionary(self): + params = search.meilisearch.get_search_params( + field_dictionary={ + "course": "course-v1:testorg+test1+alpha", + "org": "testorg", + } + ) + assert [ + 'course = "course-v1:testorg+test1+alpha"', + 'org = "testorg"', + ] == params["filter"] + + def test_search_params_filter_dictionary(self): + params = search.meilisearch.get_search_params( + filter_dictionary={"key": "value"} + ) + assert ['key = "value" OR key NOT EXISTS'] == params["filter"] + + def test_search_params_value_range(self): + params = search.meilisearch.get_search_params( + filter_dictionary={"value": ValueRange(lower=1, upper=2)} + ) + assert ["(value >= 1 AND value <= 2) OR value NOT EXISTS"] == params["filter"] + + params = search.meilisearch.get_search_params( + filter_dictionary={"value": ValueRange(lower=1)} + ) + assert ["value >= 1 OR value NOT EXISTS"] == params["filter"] + + def test_search_params_date_range(self): + params = search.meilisearch.get_search_params( + filter_dictionary={ + "enrollment_end": DateRange( + lower=datetime(2024, 1, 1), upper=datetime(2024, 1, 2) + ) + } + ) + assert [ + "(enrollment_end >= 1704067200.0 AND enrollment_end <= 1704153600.0) OR enrollment_end NOT EXISTS" + ] == params["filter"] + + params = search.meilisearch.get_search_params( + filter_dictionary={"enrollment_end": DateRange(lower=datetime(2024, 1, 1))} + ) + assert [ + "enrollment_end >= 1704067200.0 OR enrollment_end NOT EXISTS" + ] == params["filter"] From fa0fc71db1c87bcce2a60af7c245a08962acb6c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 17 Oct 2024 12:51:02 +0200 Subject: [PATCH 4/9] chore: bump version to 4.0.1 --- edxsearch/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edxsearch/__init__.py b/edxsearch/__init__.py index 6020c5ce..063a12c6 100644 --- a/edxsearch/__init__.py +++ b/edxsearch/__init__.py @@ -1,3 +1,3 @@ """ Container module for testing / demoing search """ -__version__ = '4.0.0' +__version__ = '4.0.1' From 28d324fc2a1633f12e93a88810ee184c103a99c0 Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 23 Oct 2024 11:41:02 +1030 Subject: [PATCH 5/9] test: add unit tests --- search/tests/test_meilisearch.py | 100 +++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/search/tests/test_meilisearch.py b/search/tests/test_meilisearch.py index 104d6a25..7e5f3af5 100644 --- a/search/tests/test_meilisearch.py +++ b/search/tests/test_meilisearch.py @@ -3,6 +3,7 @@ """ from datetime import datetime +from unittest.mock import Mock import django.test from django.utils import timezone @@ -35,6 +36,14 @@ def test_document_encode_with_timezone(self): encoded = encoder.encode(document) assert '{"date": "2024-12-31 05:00:00+00:00"}' == encoded + def test_document_encode_string(self): + document = { + "description": "I ♥ strings!", + } + encoder = search.meilisearch.DocumentEncoder() + encoded = encoder.encode(document) + assert '{"description": "I \\u2665 strings!"}' == encoded + class EngineTests(django.test.TestCase): """ @@ -256,3 +265,94 @@ def test_search_params_date_range(self): assert [ "enrollment_end >= 1704067200.0 OR enrollment_end NOT EXISTS" ] == params["filter"] + + def test_engine_init(self): + engine = search.meilisearch.MeilisearchEngine(index="my_index") + assert engine.meilisearch_index_name == "my_index" + + def test_engine_index(self): + engine = search.meilisearch.MeilisearchEngine(index="my_index") + engine.meilisearch_index.add_documents = Mock() + document = { + "id": "abcd", + "name": "My name", + "title": "My title", + } + processed_document = { + # Primary key field + # can be verified with: echo -n "abcd" | sha1sum + "_pk": "81fe8bfe87576c3ecb22426f8e57847382917acf", + "id": "abcd", + "name": "My name", + "title": "My title", + } + engine.index(sources=[document]) + engine.meilisearch_index.add_documents.assert_called_with( + [processed_document], + serializer=search.meilisearch.DocumentEncoder, + ) + + def test_engine_search(self): + engine = search.meilisearch.MeilisearchEngine(index="my_index") + engine.meilisearch_index.search = Mock(return_value={ + "hits": [ + { + "pk": "f381d4f1914235c9532576c0861d09b484ade634", + "id": "course-v1:OpenedX+DemoX+DemoCourse", + "_rankingScore": 0.865, + }, + ], + "query": "demo", + "processingTimeMs": 0, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + }) + + results = engine.search( + query_string="abc", + field_dictionary={ + "course": "course-v1:testorg+test1+alpha", + "org": "testorg", + }, + filter_dictionary={"key": "value"}, + exclude_dictionary={"id": ["abcd"]}, + aggregation_terms={"org": 1, "course": 2}, + ) + + engine.meilisearch_index.search.assert_called_with("abc", { + "showRankingScore": True, + "facets": ["org", "course"], + "filter": [ + 'course = "course-v1:testorg+test1+alpha"', + 'org = "testorg"', 'key = "value" OR key NOT EXISTS', + 'NOT _pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"', + ] + }) + assert results == { + "aggs": {}, + "max_score": 0.865, + "results": [ + { + "_id": "course-v1:OpenedX+DemoX+DemoCourse", + "_index": "my_index", + "_type": "_doc", + "data": { + "id": "course-v1:OpenedX+DemoX+DemoCourse", + "pk": "f381d4f1914235c9532576c0861d09b484ade634", + }, + }, + ], + "took": 0, + "total": 1, + } + + def test_engine_remove(self): + engine = search.meilisearch.MeilisearchEngine(index="my_index") + engine.meilisearch_index.delete_documents = Mock() + # Primary key field + # can be verified with: echo -n "abcd" | sha1sum + doc_id = "abcd" + doc_pk = "81fe8bfe87576c3ecb22426f8e57847382917acf" + engine.remove(doc_ids=[doc_id]) + engine.meilisearch_index.delete_documents.assert_called_with([doc_pk]) From 3daff7e516a9a84b282de3cf75307bf17fa52c89 Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 23 Oct 2024 13:38:13 +1030 Subject: [PATCH 6/9] test: slightly more test coverage --- search/tests/test_meilisearch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/search/tests/test_meilisearch.py b/search/tests/test_meilisearch.py index 7e5f3af5..dc0e9f77 100644 --- a/search/tests/test_meilisearch.py +++ b/search/tests/test_meilisearch.py @@ -318,6 +318,7 @@ def test_engine_search(self): filter_dictionary={"key": "value"}, exclude_dictionary={"id": ["abcd"]}, aggregation_terms={"org": 1, "course": 2}, + log_search_params=True, ) engine.meilisearch_index.search.assert_called_with("abc", { From faa93ef450d5d13819dd21b53dc27c791170df60 Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 23 Oct 2024 13:38:25 +1030 Subject: [PATCH 7/9] chore: bump version to 4.1.0 --- edxsearch/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edxsearch/__init__.py b/edxsearch/__init__.py index 063a12c6..d00a3e59 100644 --- a/edxsearch/__init__.py +++ b/edxsearch/__init__.py @@ -1,3 +1,3 @@ """ Container module for testing / demoing search """ -__version__ = '4.0.1' +__version__ = '4.1.0' From c0a3c70678b91607dffcd15aa02bb86b38f62edf Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 23 Oct 2024 13:57:36 +1030 Subject: [PATCH 8/9] chore: consolidate settings files and revert change to manage.py --- edxsearch/settings.py | 29 ++++++++++ manage.py | 2 +- settings.py | 132 ------------------------------------------ 3 files changed, 30 insertions(+), 133 deletions(-) delete mode 100644 settings.py diff --git a/edxsearch/settings.py b/edxsearch/settings.py index c6aee223..e200eaff 100644 --- a/edxsearch/settings.py +++ b/edxsearch/settings.py @@ -53,6 +53,7 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'eventtracking.django', 'waffle', ) @@ -99,3 +100,31 @@ # https://docs.djangoproject.com/en/1.6/howto/static-files/ STATIC_URL = '/static/' + +# EVENT TRACKING ################################# + +TRACK_MAX_EVENT = 50000 + +TRACKING_BACKENDS = { + 'logger': { + 'ENGINE': 'track.backends.logger.LoggerBackend', + 'OPTIONS': { + 'name': 'tracking' + } + } +} + +# We're already logging events, and we don't want to capture user +# names/passwords. Heartbeat events are likely not interesting. +TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] + +EVENT_TRACKING_ENABLED = True +EVENT_TRACKING_BACKENDS = { + 'logger': { + 'ENGINE': 'eventtracking.backends.logger.LoggerBackend', + 'OPTIONS': { + 'name': 'tracking', + 'max_event_size': TRACK_MAX_EVENT, + } + } +} diff --git a/manage.py b/manage.py index a301aba1..074a0e3b 100755 --- a/manage.py +++ b/manage.py @@ -7,7 +7,7 @@ import sys if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edxsearch.settings") from django.core.management import execute_from_command_line diff --git a/settings.py b/settings.py deleted file mode 100644 index 65362a40..00000000 --- a/settings.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -Django settings for edxsearch test project. - -For more information on this file, see -https://docs.djangoproject.com/en/1.6/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.6/ref/settings/ -""" - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) - -import os -BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - - -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/ - -# SECURITY WARNING: keep the secret key used in production secret! -# This is just a container for running tests, it's okay to allow it to be -# defaulted here if not present in environment settings -SECRET_KEY = os.environ.get('SECRET_KEY', '@krr4&!u8#g&2^(q53e3xu_kux$3rm=)7s3m1mjg2%$#u($-g4') - -# SECURITY WARNING: don't run with debug turned on in production! -# This is just a container for running tests -DEBUG = True - -ALLOWED_HOSTS = [] - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': ( - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ) - } - }, -] - - -# Application definition - -INSTALLED_APPS = ( - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'eventtracking.django', - 'waffle', -) - -MIDDLEWARE = ( - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'waffle.middleware.WaffleMiddleware', -) - -ROOT_URLCONF = 'search.urls' - -WSGI_APPLICATION = 'edxsearch.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/1.6/ref/settings/#databases - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), - } -} - -# Internationalization -# https://docs.djangoproject.com/en/1.6/topics/i18n/ - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.6/howto/static-files/ - -STATIC_URL = '/static/' - - -############################## EVENT TRACKING ################################# - -TRACK_MAX_EVENT = 50000 - -TRACKING_BACKENDS = { - 'logger': { - 'ENGINE': 'track.backends.logger.LoggerBackend', - 'OPTIONS': { - 'name': 'tracking' - } - } -} - -# We're already logging events, and we don't want to capture user -# names/passwords. Heartbeat events are likely not interesting. -TRACKING_IGNORE_URL_PATTERNS = [r'^/event', r'^/login', r'^/heartbeat'] - -EVENT_TRACKING_ENABLED = True -EVENT_TRACKING_BACKENDS = { - 'logger': { - 'ENGINE': 'eventtracking.backends.logger.LoggerBackend', - 'OPTIONS': { - 'name': 'tracking', - 'max_event_size': TRACK_MAX_EVENT, - } - } -} - From 75522778896b68cec6fdf52d81265027e9b8758b Mon Sep 17 00:00:00 2001 From: Jillian Vogel Date: Wed, 23 Oct 2024 14:02:08 +1030 Subject: [PATCH 9/9] test: fix settings path --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 20d4e9cb..c9e94021 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ deps = django42: Django>=4.2,<4.3 -r {toxinidir}/requirements/testing.txt commands = - python -Wd -m coverage run manage.py test --settings=settings {posargs} + python -Wd -m coverage run manage.py test {posargs} python -m coverage xml [testenv:quality]