From b9930ec7cc3e8c884113bffc988c07c95a58fe5f Mon Sep 17 00:00:00 2001 From: Julia Eskew Date: Wed, 1 Sep 2021 11:46:49 -0400 Subject: [PATCH] feat: Optimize the querying of course block dates when querying dates for use in a course outline, which don't need block dates below the subsection level of a course. Also, rework the block date caching to use TieredCache, as well as using the current published version of a course in the cache key to remove the need for cache invalidation. TNL-8061 --- CHANGELOG.rst | 5 + edx_when/__init__.py | 4 +- edx_when/api.py | 179 ++++++++++++------ .../0008_courseversion_block_type.py | 22 +++ edx_when/models.py | 6 +- tests/__init__.py | 0 tests/test_api.py | 64 ++++--- tests/test_xblock_services.py | 1 - 8 files changed, 191 insertions(+), 90 deletions(-) create mode 100644 edx_when/migrations/0008_courseversion_block_type.py create mode 100644 tests/__init__.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 54d42fa..e8c52d5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,6 +14,11 @@ Change Log Unreleased ~~~~~~~~~~ +[2.2.0] - 2021-08-27 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* Added optimization when requesting course block dates for an outline, where block dates below subsections are unneeded. +* Use current version of the course to improve the cache key, along with using the TieredCache to cache date data. + [2.1.0] - 2021-07-23 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Added Django 3.2 Support diff --git a/edx_when/__init__.py b/edx_when/__init__.py index ed31c48..98a50dd 100644 --- a/edx_when/__init__.py +++ b/edx_when/__init__.py @@ -1,7 +1,7 @@ """ -Your project description goes here. +Central source of course block dates for the LMS. """ -__version__ = '2.1.0' +__version__ = '2.2.0' default_app_config = 'edx_when.apps.EdxWhenConfig' # pylint: disable=invalid-name diff --git a/edx_when/api.py b/edx_when/api.py index 7dd25e1..65a972a 100644 --- a/edx_when/api.py +++ b/edx_when/api.py @@ -5,11 +5,10 @@ import logging from datetime import timedelta -from django.core.cache import cache from django.core.exceptions import ValidationError from django.db import transaction from django.db.models import DateTimeField, ExpressionWrapper, F, ObjectDoesNotExist, Q -from edx_django_utils.cache.utils import DEFAULT_REQUEST_CACHE +from edx_django_utils.cache.utils import TieredCache from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey, UsageKey @@ -26,15 +25,28 @@ FIELDS_TO_EXTRACT = ('due', 'start', 'end') -def _content_dates_cache_key(course_key, query_dict): - """Memcached key for ContentDates given course_key and filter args.""" +def _content_dates_cache_key(course_key, query_dict, subsection_and_higher_only, published_version): + """ + Memcached key for ContentDates given course_key, filter args, subsection and higher blocks, and published version. + + Adding the course's published version makes cache invalidation unnecessary, + as setting new course block dates will always be a new course version. + """ query_dict_str = ".".join( sorted( "{},{}".format(key, value or "") for key, value in query_dict.items() ) ) - return f"edx-when.content_dates:{course_key}:{query_dict_str}" + subsection_and_higher_only_str = '' + if subsection_and_higher_only: + subsection_and_higher_only_str = 'subsection_and_higher_only' + published_version_str = '' + if published_version: + published_version_str = published_version + + return f'edx-when.content_dates:{course_key}:{query_dict_str}:'\ + f'{subsection_and_higher_only_str}:{published_version_str}' def _ensure_key(key_class, key_obj): @@ -68,7 +80,7 @@ def set_dates_for_course(course_key, items): """ Set dates for blocks. - items is an iterator of (location, field metadata dictionary) + items: iterator of (location, field metadata dictionary) """ with transaction.atomic(): active_date_ids = [] @@ -79,16 +91,22 @@ def set_dates_for_course(course_key, items): val = fields[field] if val: log.info('Setting date for %r, %s, %r', location, field, val) - active_date_ids.append(set_date_for_block(course_key, location, field, val)) + active_date_ids.append( + set_date_for_block(course_key, location, field, val) + ) # Now clear out old dates that we didn't touch - clear_dates_for_course(course_key, keep=active_date_ids) + _clear_dates_for_course(course_key, active_date_ids) -def clear_dates_for_course(course_key, keep=None): +def _clear_dates_for_course(course_key, keep=None): """ Set all dates to inactive. + This method changes currently-cached rows - but the cache key contains the course's published version. + And the calling method is *only* invoked upon course publish, which guarantees that a new course published + version will be used to get course dates after this call is made, invalidating existing cache entries. + Arguments: course_key: either a CourseKey or string representation of same keep: an iterable of ContentDate ids to keep active @@ -99,10 +117,6 @@ def clear_dates_for_course(course_key, keep=None): dates = dates.exclude(id__in=keep) dates.update(active=False) - for query_dict in [{}, {'policy__rel_date': None}]: - cache_key = _content_dates_cache_key(course_key, query_dict) - cache.delete(cache_key) - def _get_end_dates_from_content_dates(qset): """ @@ -122,9 +136,33 @@ def _get_end_dates_from_content_dates(qset): return end_datetime, cutoff_datetime +def _processed_results_cache_key( + course_id, user_id, schedule, allow_relative_dates, + subsection_and_higher_only, published_version + ): + """ + Construct the cache key, incorporating all parameters which would cause a different query set to be returned. + """ + cache_key = 'course_dates.%s' % course_id + if user_id: + cache_key += '.%s' % user_id + if schedule: + cache_key += '.schedule-%s' % schedule.start_date + if allow_relative_dates: + cache_key += '.with-rel-dates' + if subsection_and_higher_only: + cache_key += '.subsection_and_higher_only' + cache_key += '.%s' % published_version if published_version else '' + return cache_key + + # TODO: Record dates for every block in the course, not just the ones where the block # has an explicitly set date. -def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None): +def get_dates_for_course( + course_id, + user=None, use_cached=True, schedule=None, + subsection_and_higher_only=False, published_version=None + ): """ Return dictionary of dates for the given course_id and optional user. @@ -133,29 +171,35 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None): Arguments: course_id: either a CourseKey or string representation of same - user: None, an int, or a User object - use_cached: will skip cache lookups (but not saves) if False - schedule: optional override for a user's enrollment Schedule, used for relative date calculations + user: None, an int (user_id), or a User object + use_cached: bool (optional) - skips cache lookups (but not saves) if False + schedule: Schedule obj (optional) - override for a user's enrollment Schedule, used + for relative date calculations + subsection_and_higher_only: bool (optional) - only returns dates for blocks at the subsection + level and higher (i.e. course, section (chapter), subsection (sequential)). + published_version: (optional) string representing the ID of the course's published version """ course_id = _ensure_key(CourseKey, course_id) log.debug("Getting dates for %s as %s", course_id, user) allow_relative_dates = _are_relative_dates_enabled(course_id) - cache_key = 'course_dates.%s' % course_id + user_id = None if user: if isinstance(user, int): user_id = user else: user_id = user.id if not user.is_anonymous else '' - cache_key += '.%s' % user_id - else: - user_id = None - if schedule: - cache_key += '.schedule-%s' % schedule.start_date - if allow_relative_dates: - cache_key += '.with-rel-dates' - dates = DEFAULT_REQUEST_CACHE.data.get(cache_key, None) + # Construct the cache key, incorporating all parameters which would cause a different + # query set to be returned. + processed_results_cache_key = _processed_results_cache_key( + course_id, user_id, schedule, allow_relative_dates, subsection_and_higher_only, published_version + ) + + dates = None + cached_response = TieredCache.get_cached_response(processed_results_cache_key) + if cached_response.is_found: + dates = cached_response.value if use_cached and dates is not None: return dates @@ -166,19 +210,28 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None): # to cache invalidation in clear_dates_for_course. This is only safe to do # because a) we serialize to cache with pickle; b) we don't write to # ContentDate in this function; This is not a great long-term solution. - external_cache_key = _content_dates_cache_key(course_id, rel_lookup) - qset = cache.get(external_cache_key) if use_cached else None + raw_results_cache_key = _content_dates_cache_key( + course_id, rel_lookup, subsection_and_higher_only, published_version + ) + qset = None + if use_cached: + cached_response = TieredCache.get_cached_response(raw_results_cache_key) + if cached_response.is_found: + qset = cached_response.value if qset is None: + qset = models.ContentDate.objects.filter(course_id=course_id, active=True, **rel_lookup) + if subsection_and_higher_only: + # Include NULL block_type values as well because of lazy rollout. + qset = qset.filter(Q(block_type__in=('course', 'chapter', 'sequential', None))) + qset = list( - models.ContentDate.objects - .filter(course_id=course_id, active=True, **rel_lookup) - .select_related('policy') - .only( - "course_id", "policy__rel_date", - "policy__abs_date", "location", "field" - ) + qset.select_related('policy') + .only( + "course_id", "policy__rel_date", + "policy__abs_date", "location", "field" + ) ) - cache.set(external_cache_key, qset) + TieredCache.set_all_tiers(raw_results_cache_key, qset) dates = {} policies = {} @@ -212,12 +265,12 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None): except (ValueError, ObjectDoesNotExist, KeyError): log.warning("Unable to read date for %s", userdate.content_date, exc_info=True) - DEFAULT_REQUEST_CACHE.data[cache_key] = dates + TieredCache.set_all_tiers(processed_results_cache_key, dates) return dates -def get_date_for_block(course_id, block_id, name='due', user=None): +def get_date_for_block(course_id, block_id, name='due', user=None, published_version=None): """ Return the date for block in the course for the (optional) user. @@ -226,9 +279,15 @@ def get_date_for_block(course_id, block_id, name='due', user=None): block_id: either a UsageKey or string representation of same name (optional): the name of the date field to read user: None, an int, or a User object + published_version: (optional) string representing the ID of the course's published version """ try: - return get_dates_for_course(course_id, user).get((_ensure_key(UsageKey, block_id), name), None) + return get_dates_for_course( + course_id, user=user, published_version=published_version + ).get( + (_ensure_key(UsageKey, block_id), name), + None + ) except InvalidKeyError: return None @@ -296,7 +355,10 @@ def get_overrides_for_user(course_id, user): yield {'location': udate.content_date.location, 'actual_date': udate.actual_date} -def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None, reason='', actor=None): +def set_date_for_block( + course_id, block_id, field, date_or_timedelta, + user=None, reason='', actor=None + ): """ Save the date for a particular field in a block. @@ -318,6 +380,14 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None, else: date_kwargs = {'abs_date': date_or_timedelta} + def _set_content_date_policy(date_kwargs, existing_content_date): + # Race conditions were creating multiple DatePolicies w/ the same values. Handle that case. + existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id')) + if existing_policies: + existing_content_date.policy = existing_policies[0] + else: + existing_content_date.policy = models.DatePolicy.objects.create(**date_kwargs) + with transaction.atomic(savepoint=False): # this is frequently called in a loop, let's avoid the savepoints try: existing_date = models.ContentDate.objects.select_related('policy').get( @@ -327,20 +397,13 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None, existing_date.active = True except models.ContentDate.DoesNotExist as error: if user: + # A UserDate creation below requires an existing ContentDate. raise MissingDateError(block_id) from error existing_date = models.ContentDate(course_id=course_id, location=block_id, field=field) - - # We had race-conditions create multiple DatePolicies w/ the same values. Handle that case. - try: - existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id')) - except models.DatePolicy.DoesNotExist: - existing_policies = [] - if existing_policies: - existing_date.policy = existing_policies[0] - else: - existing_date.policy = models.DatePolicy.objects.create(**date_kwargs) + _set_content_date_policy(date_kwargs, existing_date) needs_save = True + # Determine if ourse block date is for a particular user -or- for the course in general. if user and not user.is_anonymous: userd = models.UserDate( user=user, @@ -363,18 +426,14 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None, existing_date.policy.abs_date or existing_date.policy.rel_date, date_or_timedelta ) - # We had race-conditions create multiple DatePolicies w/ the same values. Handle that case. - try: - existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id')) - except models.DatePolicy.DoesNotExist: - existing_policies = [] - if existing_policies: - existing_date.policy = existing_policies[0] - else: - existing_date.policy = models.DatePolicy.objects.create(**date_kwargs) - + _set_content_date_policy(date_kwargs, existing_date) needs_save = True + # Sync the block_type for the ContentDate, if needed. + if existing_date.block_type != block_id.block_type: + existing_date.block_type = block_id.block_type + needs_save = True + if needs_save: existing_date.save() return existing_date.id diff --git a/edx_when/migrations/0008_courseversion_block_type.py b/edx_when/migrations/0008_courseversion_block_type.py new file mode 100644 index 0000000..6b0326f --- /dev/null +++ b/edx_when/migrations/0008_courseversion_block_type.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.6 on 2021-09-01 10:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('edx_when', '0007_meta_tweaks'), + ] + + operations = [ + migrations.AddField( + model_name='contentdate', + name='block_type', + field=models.CharField(max_length=255, null=True), + ), + migrations.AddIndex( + model_name='contentdate', + index=models.Index(fields=['course_id', 'block_type'], name='edx_when_course_block_type_idx'), + ), + ] diff --git a/edx_when/models.py b/edx_when/models.py index 01a1761..697d5d7 100644 --- a/edx_when/models.py +++ b/edx_when/models.py @@ -96,18 +96,22 @@ class ContentDate(models.Model): location = UsageKeyField(null=True, default=None, db_index=True, max_length=255) field = models.CharField(max_length=255, default='') active = models.BooleanField(default=True) + block_type = models.CharField(max_length=255, null=True) class Meta: """Django Metadata.""" unique_together = ('policy', 'location', 'field') + indexes = [ + models.Index(fields=('course_id', 'block_type'), name='edx_when_course_block_type_idx'), + ] def __str__(self): """ Get a string representation of this model instance. """ # Location already holds course id - return f'{self.location}, {self.field}' + return f'ContentDate({self.policy}, {self.location}, {self.field}, {self.block_type})' def schedule_for_user(self, user): """ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_api.py b/tests/test_api.py index 3b7f19e..2c6724a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -8,9 +8,8 @@ import ddt from django.contrib import auth -from django.core.cache import cache from django.test import TestCase -from edx_django_utils.cache.utils import DEFAULT_REQUEST_CACHE +from edx_django_utils.cache.utils import TieredCache from opaque_keys.edx.locator import CourseLocator from edx_when import api, models @@ -34,6 +33,8 @@ def setUp(self): self.course = DummyCourse(id='course-v1:testX+tt101+2019') self.course.save() + self.course_version = 'TEST_VERSION' + self.user = User(username='tester', email='tester@test.com') self.user.save() @@ -50,10 +51,9 @@ def setUp(self): relative_dates_patcher = patch('edx_when.api._are_relative_dates_enabled', return_value=True) relative_dates_patcher.start() self.addCleanup(relative_dates_patcher.stop) - self.addCleanup(cache.clear) + self.addCleanup(TieredCache.dangerous_clear_all_tiers) - cache.clear() - DEFAULT_REQUEST_CACHE.clear() + TieredCache.dangerous_clear_all_tiers() @patch('edx_when.api.Schedule', DummySchedule) def test_get_schedules_with_due_date_for_abs_date(self): @@ -126,6 +126,22 @@ def test_set_dates_for_course(self): cdates = models.ContentDate.objects.all() assert len(cdates) == NUM_OVERRIDES + def test_get_dates_for_course_outline(self): + items = make_items() + course_key = items[0][0].course_key + items.append((make_block_id(course_key, block_type='video'), {'start': datetime(2019, 3, 21), 'test': '1'})) + api.set_dates_for_course(course_key, items) + # Ensure the video block *was* returned normally. + retrieved = api.get_dates_for_course( + course_key, subsection_and_higher_only=False, published_version=self.course_version + ) + assert len(retrieved) == NUM_OVERRIDES + 1 + # Ensure the video block *was not* returned with subsection and higher blocks. + retrieved = api.get_dates_for_course( + course_key, subsection_and_higher_only=True, published_version=self.course_version + ) + assert len(retrieved) == NUM_OVERRIDES + def test_get_dates_for_course(self): items = make_items() api.set_dates_for_course(items[0][0].course_key, items) @@ -188,14 +204,14 @@ def test_clear_dates_for_course(self): keep_date = models.ContentDate.objects.get(location=items[1][0]) with self.assertNumQueries(1): - api.clear_dates_for_course(items[0][0].course_key, keep=[keep_date.id]) + api._clear_dates_for_course(items[0][0].course_key, keep=[keep_date.id]) # pylint: disable=protected-access retrieved = api.get_dates_for_course(items[0][0].course_key, use_cached=False) self.assertEqual(len(retrieved), 1) self.assertEqual(list(retrieved.keys())[0][0], items[1][0]) with self.assertNumQueries(1): - api.clear_dates_for_course(items[0][0].course_key) + api._clear_dates_for_course(items[0][0].course_key) # pylint: disable=protected-access self.assertEqual(api.get_dates_for_course(items[0][0].course_key, use_cached=False), {}) def test_set_user_override_invalid_block(self): @@ -241,8 +257,7 @@ def test_set_user_override(self, initial_date, override_date, expected_date): api.set_dates_for_course(str(block_id.course_key), items) api.set_date_for_block(block_id.course_key, block_id, 'due', override_date, user=self.user) - DEFAULT_REQUEST_CACHE.clear() - cache.clear() + TieredCache.dangerous_clear_all_tiers() retrieved = api.get_dates_for_course(block_id.course_key, user=self.user.id) assert len(retrieved) == NUM_OVERRIDES assert retrieved[block_id, 'due'] == expected_date @@ -275,10 +290,8 @@ def test_set_date_for_block(self, initial_date, override_date, expected_date): items[0][1]['due'] = initial_date api.set_dates_for_course(str(block_id.course_key), items) - api.set_date_for_block(block_id.course_key, block_id, 'due', override_date) - DEFAULT_REQUEST_CACHE.clear() - cache.clear() + TieredCache.dangerous_clear_all_tiers() retrieved = api.get_dates_for_course(block_id.course_key, user=self.user.id) assert len(retrieved) == NUM_OVERRIDES assert retrieved[block_id, 'due'] == expected_date @@ -299,15 +312,13 @@ def test_remove_user_override(self, initial_date, override_date, expected_date): api.set_dates_for_course(str(block_id.course_key), items) api.set_date_for_block(block_id.course_key, block_id, 'due', override_date, user=self.user) - DEFAULT_REQUEST_CACHE.clear() - cache.clear() + TieredCache.dangerous_clear_all_tiers() retrieved = api.get_dates_for_course(block_id.course_key, user=self.user.id) assert len(retrieved) == NUM_OVERRIDES assert retrieved[block_id, 'due'] == expected_date api.set_date_for_block(block_id.course_key, block_id, 'due', None, user=self.user) - DEFAULT_REQUEST_CACHE.clear() - cache.clear() + TieredCache.dangerous_clear_all_tiers() retrieved = api.get_dates_for_course(block_id.course_key, user=self.user.id) assert len(retrieved) == NUM_OVERRIDES if isinstance(initial_date, timedelta): @@ -456,8 +467,7 @@ def test_relative_date_past_cutoff_date(self): ] assert api.get_dates_for_course(course_key, schedule=self.schedule) == dict(dates) - cache.clear() - DEFAULT_REQUEST_CACHE.clear() + TieredCache.dangerous_clear_all_tiers() # Now set schedule start date too close to the end date and verify that we no longer get due dates self.schedule.created = datetime(2019, 4, 15) @@ -506,12 +516,11 @@ def test_get_dates_for_course_query_counts(self, has_schedule, pass_user_object, ) assert dates == cached_dates - # Now wipe the request cache... - DEFAULT_REQUEST_CACHE.clear() + # Now wipe all cache tiers... + TieredCache.dangerous_clear_all_tiers() - # This time, test the external cache (which eliminates the one large - # query to ContentDates). - with self.assertNumQueries(query_count - 1): + # No cached values - so will do *all* queries again. + with self.assertNumQueries(query_count): externally_cached_dates = api.get_dates_for_course( course_id=self.course.id, user=user, schedule=schedule ) @@ -527,13 +536,16 @@ def test_get_dates_for_course_query_counts(self, has_schedule, pass_user_object, def test_set_dates_for_course_query_counts(self): items = make_items() - with self.assertNumQueries(2): # two for transaction wrappers + with self.assertNumQueries(2): # two for savepoint wrappers with patch('edx_when.api.set_date_for_block', return_value=1) as mock_set: - with patch('edx_when.api.clear_dates_for_course') as mock_clear: + with patch('edx_when.api._clear_dates_for_course') as mock_clear: api.set_dates_for_course(self.course.id, items) self.assertEqual(mock_set.call_count, NUM_OVERRIDES) - self.assertEqual(mock_clear.call_args_list, [call(self.course.id, keep=[1] * NUM_OVERRIDES)]) + self.assertEqual( + mock_clear.call_args_list, + [call(self.course.id, [1] * NUM_OVERRIDES)] + ) def test_set_date_for_block_query_counts(self): args = (self.course.id, make_block_id(self.course.id), 'due', datetime(2019, 3, 22)) diff --git a/tests/test_xblock_services.py b/tests/test_xblock_services.py index 1892dd4..563a93b 100644 --- a/tests/test_xblock_services.py +++ b/tests/test_xblock_services.py @@ -92,7 +92,6 @@ def test_field_data_has(self): assert dfd.has(block, 'due') is True assert dfd.has(block, 'foo') is defaults.has(block, 'foo') child = MockBlock('child', block) - # import pdb;pdb.set_trace() assert dfd.has(child, 'due') is False assert dfd.default(child, 'due') == self.items[0][1]['due'] assert dfd.default(child, 'foo') is defaults.default(child, 'foo')