Skip to content

Commit

Permalink
Merge pull request #99 from edx/juliasq/TNL_8061_optimize_outline_blo…
Browse files Browse the repository at this point in the history
…ck_data_retrieval

Optimize block date retrieval for course outlines.
  • Loading branch information
Julia Eskew authored Sep 1, 2021
2 parents bfbf87c + b9930ec commit 5b233d5
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 90 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ Change Log
Unreleased
~~~~~~~~~~

[2.2.0] - 2021-08-27
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Added optimization when requesting course block dates for an outline, where block dates below subsections are unneeded.
* Use current version of the course to improve the cache key, along with using the TieredCache to cache date data.

[2.1.0] - 2021-07-23
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Added Django 3.2 Support
Expand Down
4 changes: 2 additions & 2 deletions edx_when/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Your project description goes here.
Central source of course block dates for the LMS.
"""

__version__ = '2.1.0'
__version__ = '2.2.0'

default_app_config = 'edx_when.apps.EdxWhenConfig' # pylint: disable=invalid-name
179 changes: 119 additions & 60 deletions edx_when/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import logging
from datetime import timedelta

from django.core.cache import cache
from django.core.exceptions import ValidationError
from django.db import transaction
from django.db.models import DateTimeField, ExpressionWrapper, F, ObjectDoesNotExist, Q
from edx_django_utils.cache.utils import DEFAULT_REQUEST_CACHE
from edx_django_utils.cache.utils import TieredCache
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey, UsageKey

Expand All @@ -26,15 +25,28 @@
FIELDS_TO_EXTRACT = ('due', 'start', 'end')


def _content_dates_cache_key(course_key, query_dict):
"""Memcached key for ContentDates given course_key and filter args."""
def _content_dates_cache_key(course_key, query_dict, subsection_and_higher_only, published_version):
"""
Memcached key for ContentDates given course_key, filter args, subsection and higher blocks, and published version.
Adding the course's published version makes cache invalidation unnecessary,
as setting new course block dates will always be a new course version.
"""
query_dict_str = ".".join(
sorted(
"{},{}".format(key, value or "")
for key, value in query_dict.items()
)
)
return f"edx-when.content_dates:{course_key}:{query_dict_str}"
subsection_and_higher_only_str = ''
if subsection_and_higher_only:
subsection_and_higher_only_str = 'subsection_and_higher_only'
published_version_str = ''
if published_version:
published_version_str = published_version

return f'edx-when.content_dates:{course_key}:{query_dict_str}:'\
f'{subsection_and_higher_only_str}:{published_version_str}'


def _ensure_key(key_class, key_obj):
Expand Down Expand Up @@ -68,7 +80,7 @@ def set_dates_for_course(course_key, items):
"""
Set dates for blocks.
items is an iterator of (location, field metadata dictionary)
items: iterator of (location, field metadata dictionary)
"""
with transaction.atomic():
active_date_ids = []
Expand All @@ -79,16 +91,22 @@ def set_dates_for_course(course_key, items):
val = fields[field]
if val:
log.info('Setting date for %r, %s, %r', location, field, val)
active_date_ids.append(set_date_for_block(course_key, location, field, val))
active_date_ids.append(
set_date_for_block(course_key, location, field, val)
)

# Now clear out old dates that we didn't touch
clear_dates_for_course(course_key, keep=active_date_ids)
_clear_dates_for_course(course_key, active_date_ids)


def clear_dates_for_course(course_key, keep=None):
def _clear_dates_for_course(course_key, keep=None):
"""
Set all dates to inactive.
This method changes currently-cached rows - but the cache key contains the course's published version.
And the calling method is *only* invoked upon course publish, which guarantees that a new course published
version will be used to get course dates after this call is made, invalidating existing cache entries.
Arguments:
course_key: either a CourseKey or string representation of same
keep: an iterable of ContentDate ids to keep active
Expand All @@ -99,10 +117,6 @@ def clear_dates_for_course(course_key, keep=None):
dates = dates.exclude(id__in=keep)
dates.update(active=False)

for query_dict in [{}, {'policy__rel_date': None}]:
cache_key = _content_dates_cache_key(course_key, query_dict)
cache.delete(cache_key)


def _get_end_dates_from_content_dates(qset):
"""
Expand All @@ -122,9 +136,33 @@ def _get_end_dates_from_content_dates(qset):
return end_datetime, cutoff_datetime


def _processed_results_cache_key(
course_id, user_id, schedule, allow_relative_dates,
subsection_and_higher_only, published_version
):
"""
Construct the cache key, incorporating all parameters which would cause a different query set to be returned.
"""
cache_key = 'course_dates.%s' % course_id
if user_id:
cache_key += '.%s' % user_id
if schedule:
cache_key += '.schedule-%s' % schedule.start_date
if allow_relative_dates:
cache_key += '.with-rel-dates'
if subsection_and_higher_only:
cache_key += '.subsection_and_higher_only'
cache_key += '.%s' % published_version if published_version else ''
return cache_key


# TODO: Record dates for every block in the course, not just the ones where the block
# has an explicitly set date.
def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None):
def get_dates_for_course(
course_id,
user=None, use_cached=True, schedule=None,
subsection_and_higher_only=False, published_version=None
):
"""
Return dictionary of dates for the given course_id and optional user.
Expand All @@ -133,29 +171,35 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None):
Arguments:
course_id: either a CourseKey or string representation of same
user: None, an int, or a User object
use_cached: will skip cache lookups (but not saves) if False
schedule: optional override for a user's enrollment Schedule, used for relative date calculations
user: None, an int (user_id), or a User object
use_cached: bool (optional) - skips cache lookups (but not saves) if False
schedule: Schedule obj (optional) - override for a user's enrollment Schedule, used
for relative date calculations
subsection_and_higher_only: bool (optional) - only returns dates for blocks at the subsection
level and higher (i.e. course, section (chapter), subsection (sequential)).
published_version: (optional) string representing the ID of the course's published version
"""
course_id = _ensure_key(CourseKey, course_id)
log.debug("Getting dates for %s as %s", course_id, user)
allow_relative_dates = _are_relative_dates_enabled(course_id)

cache_key = 'course_dates.%s' % course_id
user_id = None
if user:
if isinstance(user, int):
user_id = user
else:
user_id = user.id if not user.is_anonymous else ''
cache_key += '.%s' % user_id
else:
user_id = None
if schedule:
cache_key += '.schedule-%s' % schedule.start_date
if allow_relative_dates:
cache_key += '.with-rel-dates'

dates = DEFAULT_REQUEST_CACHE.data.get(cache_key, None)
# Construct the cache key, incorporating all parameters which would cause a different
# query set to be returned.
processed_results_cache_key = _processed_results_cache_key(
course_id, user_id, schedule, allow_relative_dates, subsection_and_higher_only, published_version
)

dates = None
cached_response = TieredCache.get_cached_response(processed_results_cache_key)
if cached_response.is_found:
dates = cached_response.value

if use_cached and dates is not None:
return dates
Expand All @@ -166,19 +210,28 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None):
# to cache invalidation in clear_dates_for_course. This is only safe to do
# because a) we serialize to cache with pickle; b) we don't write to
# ContentDate in this function; This is not a great long-term solution.
external_cache_key = _content_dates_cache_key(course_id, rel_lookup)
qset = cache.get(external_cache_key) if use_cached else None
raw_results_cache_key = _content_dates_cache_key(
course_id, rel_lookup, subsection_and_higher_only, published_version
)
qset = None
if use_cached:
cached_response = TieredCache.get_cached_response(raw_results_cache_key)
if cached_response.is_found:
qset = cached_response.value
if qset is None:
qset = models.ContentDate.objects.filter(course_id=course_id, active=True, **rel_lookup)
if subsection_and_higher_only:
# Include NULL block_type values as well because of lazy rollout.
qset = qset.filter(Q(block_type__in=('course', 'chapter', 'sequential', None)))

qset = list(
models.ContentDate.objects
.filter(course_id=course_id, active=True, **rel_lookup)
.select_related('policy')
.only(
"course_id", "policy__rel_date",
"policy__abs_date", "location", "field"
)
qset.select_related('policy')
.only(
"course_id", "policy__rel_date",
"policy__abs_date", "location", "field"
)
)
cache.set(external_cache_key, qset)
TieredCache.set_all_tiers(raw_results_cache_key, qset)

dates = {}
policies = {}
Expand Down Expand Up @@ -212,12 +265,12 @@ def get_dates_for_course(course_id, user=None, use_cached=True, schedule=None):
except (ValueError, ObjectDoesNotExist, KeyError):
log.warning("Unable to read date for %s", userdate.content_date, exc_info=True)

DEFAULT_REQUEST_CACHE.data[cache_key] = dates
TieredCache.set_all_tiers(processed_results_cache_key, dates)

return dates


def get_date_for_block(course_id, block_id, name='due', user=None):
def get_date_for_block(course_id, block_id, name='due', user=None, published_version=None):
"""
Return the date for block in the course for the (optional) user.
Expand All @@ -226,9 +279,15 @@ def get_date_for_block(course_id, block_id, name='due', user=None):
block_id: either a UsageKey or string representation of same
name (optional): the name of the date field to read
user: None, an int, or a User object
published_version: (optional) string representing the ID of the course's published version
"""
try:
return get_dates_for_course(course_id, user).get((_ensure_key(UsageKey, block_id), name), None)
return get_dates_for_course(
course_id, user=user, published_version=published_version
).get(
(_ensure_key(UsageKey, block_id), name),
None
)
except InvalidKeyError:
return None

Expand Down Expand Up @@ -296,7 +355,10 @@ def get_overrides_for_user(course_id, user):
yield {'location': udate.content_date.location, 'actual_date': udate.actual_date}


def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None, reason='', actor=None):
def set_date_for_block(
course_id, block_id, field, date_or_timedelta,
user=None, reason='', actor=None
):
"""
Save the date for a particular field in a block.
Expand All @@ -318,6 +380,14 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None,
else:
date_kwargs = {'abs_date': date_or_timedelta}

def _set_content_date_policy(date_kwargs, existing_content_date):
# Race conditions were creating multiple DatePolicies w/ the same values. Handle that case.
existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id'))
if existing_policies:
existing_content_date.policy = existing_policies[0]
else:
existing_content_date.policy = models.DatePolicy.objects.create(**date_kwargs)

with transaction.atomic(savepoint=False): # this is frequently called in a loop, let's avoid the savepoints
try:
existing_date = models.ContentDate.objects.select_related('policy').get(
Expand All @@ -327,20 +397,13 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None,
existing_date.active = True
except models.ContentDate.DoesNotExist as error:
if user:
# A UserDate creation below requires an existing ContentDate.
raise MissingDateError(block_id) from error
existing_date = models.ContentDate(course_id=course_id, location=block_id, field=field)

# We had race-conditions create multiple DatePolicies w/ the same values. Handle that case.
try:
existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id'))
except models.DatePolicy.DoesNotExist:
existing_policies = []
if existing_policies:
existing_date.policy = existing_policies[0]
else:
existing_date.policy = models.DatePolicy.objects.create(**date_kwargs)
_set_content_date_policy(date_kwargs, existing_date)
needs_save = True

# Determine if ourse block date is for a particular user -or- for the course in general.
if user and not user.is_anonymous:
userd = models.UserDate(
user=user,
Expand All @@ -363,18 +426,14 @@ def set_date_for_block(course_id, block_id, field, date_or_timedelta, user=None,
existing_date.policy.abs_date or existing_date.policy.rel_date,
date_or_timedelta
)
# We had race-conditions create multiple DatePolicies w/ the same values. Handle that case.
try:
existing_policies = list(models.DatePolicy.objects.filter(**date_kwargs).order_by('id'))
except models.DatePolicy.DoesNotExist:
existing_policies = []
if existing_policies:
existing_date.policy = existing_policies[0]
else:
existing_date.policy = models.DatePolicy.objects.create(**date_kwargs)

_set_content_date_policy(date_kwargs, existing_date)
needs_save = True

# Sync the block_type for the ContentDate, if needed.
if existing_date.block_type != block_id.block_type:
existing_date.block_type = block_id.block_type
needs_save = True

if needs_save:
existing_date.save()
return existing_date.id
Expand Down
22 changes: 22 additions & 0 deletions edx_when/migrations/0008_courseversion_block_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 3.2.6 on 2021-09-01 10:45

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('edx_when', '0007_meta_tweaks'),
]

operations = [
migrations.AddField(
model_name='contentdate',
name='block_type',
field=models.CharField(max_length=255, null=True),
),
migrations.AddIndex(
model_name='contentdate',
index=models.Index(fields=['course_id', 'block_type'], name='edx_when_course_block_type_idx'),
),
]
6 changes: 5 additions & 1 deletion edx_when/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,22 @@ class ContentDate(models.Model):
location = UsageKeyField(null=True, default=None, db_index=True, max_length=255)
field = models.CharField(max_length=255, default='')
active = models.BooleanField(default=True)
block_type = models.CharField(max_length=255, null=True)

class Meta:
"""Django Metadata."""

unique_together = ('policy', 'location', 'field')
indexes = [
models.Index(fields=('course_id', 'block_type'), name='edx_when_course_block_type_idx'),
]

def __str__(self):
"""
Get a string representation of this model instance.
"""
# Location already holds course id
return f'{self.location}, {self.field}'
return f'ContentDate({self.policy}, {self.location}, {self.field}, {self.block_type})'

def schedule_for_user(self, user):
"""
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading

0 comments on commit 5b233d5

Please sign in to comment.