From a4dec92ef500c970cb4e3c968a28f35fd322a179 Mon Sep 17 00:00:00 2001 From: Alex Dusenbery Date: Mon, 9 Sep 2019 16:49:23 -0400 Subject: [PATCH] EDUCATOR-4618 | Prevent csv processor objects from having duplicate columns. --- CHANGELOG.rst | 4 + bulk_grades/__init__.py | 2 +- bulk_grades/api.py | 174 ++++++++++++++++++++++++++-------------- tests/test_api.py | 173 +++++++++++++++++++++++++++++---------- tox.ini | 1 - 5 files changed, 248 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ca85549..95b299e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,10 @@ Unreleased * +[0.6.0] - 2019-09-10 +~~~~~~~~~~~~~~~~~~~~~ +* Prevent Grade and Intervention CSV processors from producing duplicate columns. + [0.5.10] - 2019-09-06 ~~~~~~~~~~~~~~~~~~~~~ * Prevent user from setting negative grades diff --git a/bulk_grades/__init__.py b/bulk_grades/__init__.py index 1b1882b..698cd8b 100644 --- a/bulk_grades/__init__.py +++ b/bulk_grades/__init__.py @@ -4,6 +4,6 @@ from __future__ import absolute_import, unicode_literals -__version__ = '0.5.11' +__version__ = '0.6.0' default_app_config = 'bulk_grades.apps.BulkGradesConfig' # pylint: disable=invalid-name diff --git a/bulk_grades/api.py b/bulk_grades/api.py index 09e16c5..468e344 100644 --- a/bulk_grades/api.py +++ b/bulk_grades/api.py @@ -4,6 +4,8 @@ from __future__ import absolute_import, division, unicode_literals import logging +from collections import OrderedDict +from itertools import product from django.apps import apps from django.core.exceptions import ObjectDoesNotExist @@ -194,23 +196,82 @@ def commit(self, running_task=None): grades_api.task_compute_all_grades_for_course.apply_async(kwargs={'course_key': text_type(course_key)}) -class GradeCSVProcessor(DeferrableMixin, CSVProcessor): +class GradedSubsectionMixin(object): + """ + Mixin to help generated lists of graded subsections + and appropriate column names for each. + """ + def append_columns(self, new_column_names): + """ + Appends items from ``new_column_names`` to ``self.columns`` + if the item is not already contained therein. + """ + current_columns = set(self.columns) + for new_column_name in new_column_names: + if new_column_name not in current_columns: + self.columns.append(new_column_name) + + @staticmethod + def _get_graded_subsections(course_id, filter_subsection=None, filter_assignment_type=None): + """ + Return list of graded subsections. + + If filter_subsection (block usage id) is set, return only that subsection. + If filter_assignment_type (string) is set, return only subsections of the appropriate type. + """ + subsections = OrderedDict() + for subsection in grades_api.graded_subsections_for_course_id(course_id): + block_id = text_type(subsection.location.block_id) + if ( + (filter_subsection and (block_id != filter_subsection.block_id)) + or + (filter_assignment_type and (filter_assignment_type != text_type(subsection.format))) + ): + continue + short_block_id = block_id[:8] + if short_block_id not in subsections: + subsections[short_block_id] = (subsection, subsection.display_name) + return subsections + + @staticmethod + def _subsection_column_names(short_subsection_ids, prefixes): + """ + Given an iterable of ``short_subsection_ids`` (usually from ``_get_graded_subsections`` above), + and ``prefixes`` to append to each, returns a list of names + formed from the product of the subsection ids and prefixes. + """ + return ['{}-{}'.format(prefix, short_id) for short_id, prefix in product(short_subsection_ids, prefixes)] + + +class GradeCSVProcessor(DeferrableMixin, GradedSubsectionMixin, CSVProcessor): """ CSV Processor for subsection grades. """ required_columns = ['user_id', 'course_id'] + subsection_prefixes = ('name', 'original_grade', 'previous_override', 'new_override',) def __init__(self, **kwargs): """ Create GradeCSVProcessor. """ + # First, set some default values. self.columns = ['user_id', 'username', 'course_id', 'track', 'cohort'] self.course_id = None - self.subsection_grade_max = self.subsection_grade_min = None - self.course_grade_min = self.course_grade_max = None - self.subsection = self.track = self.cohort = self._user = None + self.subsection_grade_max = None + self.subsection_grade_min = None + self.course_grade_min = None + self.course_grade_max = None + self.subsection = None + self.track = None + self.cohort = None + self._user = None + + # The CSVProcessor.__init__ method will set attributes on self + # from items in kwargs, so this super().__init__() call will + # override any attribute values assigned above. super(GradeCSVProcessor, self).__init__(**kwargs) + self._course_key = CourseKey.from_string(self.course_id) self._subsection = UsageKey.from_string(self.subsection) if self.subsection else None self._subsections = self._get_graded_subsections( @@ -218,6 +279,9 @@ def __init__(self, **kwargs): filter_subsection=self._subsection, filter_assignment_type=kwargs.get('assignment_type', None), ) + self.append_columns( + self._subsection_column_names(self._subsections.keys(), self.subsection_prefixes) + ) self._users_seen = set() def get_unique_path(self): @@ -226,26 +290,6 @@ def get_unique_path(self): """ return self.course_id - def _get_graded_subsections(self, course_id, filter_subsection=None, filter_assignment_type=None): - """ - Return list of graded subsections. - - If filter_subsection (block usage id) is set, return only that subsection. - If filter_assignment_type (string) is set, return only subsections of the appropriate type. - """ - subsections = {} - for subsection in grades_api.graded_subsections_for_course_id(course_id): - block_id = text_type(subsection.location.block_id) - if ((filter_subsection and block_id != filter_subsection.block_id) - or filter_assignment_type and filter_assignment_type != text_type(subsection.format)): - continue - short_block_id = block_id[:8] - if short_block_id not in subsections: - for key in ('name', 'original_grade', 'previous_override', 'new_override'): - self.columns.append('{}-{}'.format(key, short_block_id)) - subsections[short_block_id] = (subsection, subsection.display_name) - return subsections - def validate_row(self, row): """ Validate row. @@ -330,8 +374,11 @@ def get_rows_to_export(self): / subsection_grade.override.possible_graded_override) * 100 except AttributeError: effective_grade = (subsection_grade.earned_graded / subsection_grade.possible_graded) * 100 - if (self.subsection_grade_min and effective_grade < self.subsection_grade_min) or ( - self.subsection_grade_max and effective_grade > self.subsection_grade_max): + if ( + (self.subsection_grade_min and (effective_grade < self.subsection_grade_min)) + or + (self.subsection_grade_max and (effective_grade > self.subsection_grade_max)) + ): continue course_grade = grades_api.CourseGradeFactory().read(enrollment['user'], course_key=self._course_key) @@ -353,56 +400,53 @@ def get_rows_to_export(self): yield row -class InterventionCSVProcessor(CSVProcessor): +class InterventionCSVProcessor(GradedSubsectionMixin, CSVProcessor): """ CSV Processor for intervention report grades for masters track only. """ MASTERS_TRACK = 'masters' + subsection_prefixes = ('name', 'grade',) def __init__(self, **kwargs): """ Create InterventionCSVProcessor. """ - self.columns = ['user_id', 'username', 'email', 'student_key', 'full_name', 'course_id', 'track', 'cohort', - 'number of videos overall', 'number of videos last week', 'number of problems overall', - 'number of problems last week', - 'number of correct problems overall', 'number of correct problems last week', - 'number of problem attempts overall', 'number of problem attempts last week', - 'number of forum posts overall', 'number of forum posts last week', - 'date last active'] + # Set some default values for the attributes below + self.columns = [ + 'user_id', 'username', 'email', 'student_key', 'full_name', 'course_id', 'track', 'cohort', + 'number of videos overall', 'number of videos last week', 'number of problems overall', + 'number of problems last week', + 'number of correct problems overall', 'number of correct problems last week', + 'number of problem attempts overall', 'number of problem attempts last week', + 'number of forum posts overall', 'number of forum posts last week', + 'date last active', + ] self.course_id = None - self.cohort = self.subsection = \ - self.assignment_type = self.subsection_grade_min = \ - self.subsection_grade_max = self.course_grade_min = \ - self.course_grade_max = None + self.cohort = None + self.subsection = None + self.assignment_type = None + self.subsection_grade_min = None + self.subsection_grade_max = None + self.course_grade_min = None + self.course_grade_max = None + + # The CSVProcessor.__init__ method will set attributes on self + # from items in kwargs, so this super().__init__() call will + # potentially override any attribute values assigned above. super(InterventionCSVProcessor, self).__init__(**kwargs) + self._course_key = CourseKey.from_string(self.course_id) self._subsection = UsageKey.from_string(self.subsection) if self.subsection else None self._subsections = self._get_graded_subsections( self._course_key, filter_subsection=self._subsection, - filter_assignment_type=self.assignment_type + filter_assignment_type=kwargs.get('assignment_type', None), ) - self.columns.append('course grade letter') - self.columns.append('course grade numeric') - - def _get_graded_subsections(self, course_id, filter_subsection=None, filter_assignment_type=None): - """ - Return list of graded subsections. - """ - subsections = {} - for subsection in grades_api.graded_subsections_for_course_id(course_id): - block_id = text_type(subsection.location.block_id) - if ((filter_subsection and block_id != filter_subsection.block_id) - or filter_assignment_type and filter_assignment_type != text_type(subsection.format)): - continue - short_block_id = block_id[:8] - if short_block_id not in subsections: - for key in ('name', 'grade'): - self.columns.append('{}-{}'.format(key, short_block_id)) - subsections[short_block_id] = (subsection, subsection.display_name) - return subsections + self.append_columns( + self._subsection_column_names(self._subsections.keys(), self.subsection_prefixes) + ) + self.append_columns(('course grade letter', 'course grade numeric')) def get_rows_to_export(self): """ @@ -426,16 +470,22 @@ def get_rows_to_export(self): / subsection_grade.override.possible_graded_override) * 100 except AttributeError: effective_grade = (subsection_grade.earned_graded / subsection_grade.possible_graded) * 100 - if (self.subsection_grade_min and effective_grade < self.subsection_grade_min) or ( - self.subsection_grade_max and effective_grade > self.subsection_grade_max): + if ( + (self.subsection_grade_min and (effective_grade < self.subsection_grade_min)) + or + (self.subsection_grade_max and (effective_grade > self.subsection_grade_max)) + ): continue course_grade = grades_api.CourseGradeFactory().read(enrollment['user'], course_key=self._course_key) if self.course_grade_min or self.course_grade_max: course_grade_normalized = (course_grade.percent * 100) - if ((self.course_grade_min and course_grade_normalized < self.course_grade_min) or - (self.course_grade_max and course_grade_normalized > self.course_grade_max)): + if ( + (self.course_grade_min and (course_grade_normalized < self.course_grade_min)) + or + (self.course_grade_max and (course_grade_normalized > self.course_grade_max)) + ): continue cohort = get_cohort(enrollment['user'], self._course_key, assign=False) diff --git a/tests/test_api.py b/tests/test_api.py index 3e0c0a2..7327b70 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,9 +3,10 @@ """ Tests for the `edx-bulk-grades` api module. """ - from __future__ import absolute_import, unicode_literals +from copy import deepcopy + from django.contrib.auth.models import User from django.test import TestCase from mock import MagicMock, Mock, patch @@ -19,38 +20,55 @@ class BaseTests(TestCase): """ Common setup functionality for all test cases """ - def setUp(self): - super(BaseTests, self).setUp() - self.learner = User.objects.create(username='student@example.com') - self.staff = User.objects.create(username='staff@example.com') - self.block_id_in_module_id = '85bb02dbd2c14ba5bc31a0264b140dda' - self.block_id = 'block-v1:testX+sg101+2019+type@test+block@%s' % self.block_id_in_module_id - self.course_id = 'course-v1:testX+sg101+2019' - - def _make_enrollments(self): + @classmethod + def setUpTestData(cls): + super(BaseTests, cls).setUpTestData() + cls.learner = User.objects.create(username='student@example.com') + cls.staff = User.objects.create(username='staff@example.com') + cls.usage_key = 'block-v1:testX+sg101+2019+type@sequential+block@homework_questions' + cls.other_usage_key = 'block-v1:testX+sg101+2019+type@sequential+block@lab_questions' + cls.course_id = 'course-v1:testX+sg101+2019' + cls._make_enrollments() + + @classmethod + def _make_enrollments(cls): for name in ['audit', 'verified', 'masters']: user = User.objects.create(username='%s@example.com' % name) Profile.objects.create(user=user, name=name) - enroll = CourseEnrollment.objects.create(course_id=self.course_id, user=user, mode=name) + enroll = CourseEnrollment.objects.create(course_id=cls.course_id, user=user, mode=name) if name == 'masters': ProgramCourseEnrollment.objects.create(course_enrollment=enroll) + def _mock_graded_subsections(self): + """ + Helper function to define the return value of a mocked + ``graded_subsections_for_course_id`` function. + """ + return_value = [] + for usage_key in (self.usage_key, self.other_usage_key): + _, _, block_id = usage_key.split('@') + subsection = MagicMock() + subsection.display_name = block_id.upper() + subsection.location.block_id = block_id + return_value.append(subsection) + return return_value + class TestApi(BaseTests): """ Tests of the api functions. """ def test_set_score(self): - api.set_score(self.block_id, self.learner.id, 11, 22, override_user_id=self.staff.id) - score = api.get_score(self.block_id, self.learner.id) + api.set_score(self.usage_key, self.learner.id, 11, 22, override_user_id=self.staff.id) + score = api.get_score(self.usage_key, self.learner.id) assert score['score'] == 11 assert score['who_last_graded'] == self.staff.username - score = api.get_score(self.block_id, 11) + score = api.get_score(self.usage_key, 11) assert score is None def test_negative_score(self): with self.assertRaisesMessage(ValueError, 'score must be positive'): - api.set_score(self.block_id, self.learner.id, -2, 22, override_user_id=self.staff.id) + api.set_score(self.usage_key, self.learner.id, -2, 22, override_user_id=self.staff.id) class TestScoreProcessor(BaseTests): @@ -62,7 +80,7 @@ def _get_row(self, **kwargs): Get a properly shaped row """ row = { - 'block_id': self.block_id, + 'block_id': self.usage_key, 'New Points': 0, 'user_id': self.learner.id, 'csum': '07ec', @@ -73,11 +91,10 @@ def _get_row(self, **kwargs): return row def test_export(self): - self._make_enrollments() - processor = api.ScoreCSVProcessor(block_id=self.block_id) + processor = api.ScoreCSVProcessor(block_id=self.usage_key) rows = list(processor.get_iterator()) assert len(rows) == 4 - processor = api.ScoreCSVProcessor(block_id=self.block_id, track='masters') + processor = api.ScoreCSVProcessor(block_id=self.usage_key, track='masters') rows = list(processor.get_iterator()) assert len(rows) == 2 data = '\n'.join(rows) @@ -86,7 +103,7 @@ def test_export(self): assert 'audit' not in data def test_validate(self): - processor = api.ScoreCSVProcessor(block_id=self.block_id, max_points=100) + processor = api.ScoreCSVProcessor(block_id=self.usage_key, max_points=100) processor.validate_row(self._get_row()) processor.validate_row(self._get_row(points=1)) processor.validate_row(self._get_row(points=50.0)) @@ -95,12 +112,12 @@ def test_validate(self): with self.assertRaises(ValidationError): processor.validate_row(self._get_row(points='ab')) with self.assertRaises(ValidationError): - processor.validate_row(self._get_row(block_id=self.block_id + 'b', csum='60aa')) + processor.validate_row(self._get_row(block_id=self.usage_key + 'b', csum='60aa')) with self.assertRaises(ValidationError): - processor.validate_row(self._get_row(block_id=self.block_id + 'b', csum='bad')) + processor.validate_row(self._get_row(block_id=self.usage_key + 'b', csum='bad')) def test_preprocess(self): - processor = api.ScoreCSVProcessor(block_id=self.block_id, max_points=100) + processor = api.ScoreCSVProcessor(block_id=self.usage_key, max_points=100) row = self._get_row(points=1) expected = { 'user_id': row['user_id'], @@ -115,48 +132,110 @@ def test_preprocess(self): assert not processor.preprocess_row(self._get_row(points=0)) def test_process(self): - processor = api.ScoreCSVProcessor(block_id=self.block_id, max_points=100) + processor = api.ScoreCSVProcessor(block_id=self.usage_key, max_points=100) operation = processor.preprocess_row(self._get_row(points=1)) assert processor.process_row(operation) == (True, None) processor.handle_undo = True assert processor.process_row(operation)[1]['score'] == 1 +class MySubsectionClass(api.GradedSubsectionMixin): + pass + + +class TestGradedSubsectionMixin(BaseTests): + """ + Tests the shared methods defined in ``GradeSubsectionMixin``. + """ + def setUp(self): + super(TestGradedSubsectionMixin, self).setUp() + self.instance = MySubsectionClass() + + @patch('lms.djangoapps.grades.api.graded_subsections_for_course_id') + def test_get_graded_subsections(self, mock_graded_subsections): + mock_graded_subsections.return_value = self._mock_graded_subsections() + subsections = self.instance._get_graded_subsections(self.course_id) # pylint: disable=protected-access + assert 2 == len(subsections) + assert 'HOMEWORK_QUESTIONS' == subsections['homework'][1] + assert 'LAB_QUESTIONS' == subsections['lab_ques'][1] + + def test_subsection_column_names(self): + short_subsection_ids = ['subsection-1', 'subsection-2', 'subsection-3'] + prefixes = ['original_grade', 'previous_override', 'new_override'] + + # pylint: disable=protected-access + actual_column_names = self.instance._subsection_column_names(short_subsection_ids, prefixes) + expected_column_names = [ + 'original_grade-subsection-1', + 'previous_override-subsection-1', + 'new_override-subsection-1', + 'original_grade-subsection-2', + 'previous_override-subsection-2', + 'new_override-subsection-2', + 'original_grade-subsection-3', + 'previous_override-subsection-3', + 'new_override-subsection-3', + ] + assert expected_column_names == actual_column_names + + class TestGradeProcessor(BaseTests): """ Tests exercising the processing performed by GradeCSVProcessor """ NUM_USERS = 3 - @patch('lms.djangoapps.grades.api.CourseGradeFactory.read') - def test_export(self, course_grade_factory_mock): - self._make_enrollments() - course_grade_factory_mock.return_value = Mock(percent=0.50) + @patch('lms.djangoapps.grades.api.CourseGradeFactory.read', return_value=Mock(percent=0.50)) + def test_export(self, course_grade_factory_mock): # pylint: disable=unused-argument processor = api.GradeCSVProcessor(course_id=self.course_id) rows = list(processor.get_iterator()) assert len(rows) == self.NUM_USERS + 1 + @patch('lms.djangoapps.grades.api.graded_subsections_for_course_id') + def test_columns_not_duplicated_during_init(self, mock_graded_subsections): + """ + Tests that GradeCSVProcessor.__init__() does not cause + column names to be duplicated. + """ + mock_graded_subsections.return_value = self._mock_graded_subsections() + processor_1 = api.GradeCSVProcessor(course_id=self.course_id) + + # pretend that we serialize the processor data to some "state" + state = deepcopy(processor_1.__dict__) + processor_2 = api.GradeCSVProcessor(**state) + + assert processor_1.columns == processor_2.columns + expected_columns = [ + 'user_id', + 'username', + 'course_id', + 'track', + 'cohort', + 'name-homework', + 'original_grade-homework', + 'previous_override-homework', + 'new_override-homework', + 'name-lab_ques', + 'original_grade-lab_ques', + 'previous_override-lab_ques', + 'new_override-lab_ques' + ] + assert expected_columns == processor_1.columns + @patch('lms.djangoapps.grades.api.graded_subsections_for_course_id') def test_subsection_max_min(self, mock_graded_subsections): - self._make_enrollments() - subsection = MagicMock() - subsection.location = MagicMock() - subsection.display_name = 'asdf' - subsection.location.block_id = self.block_id_in_module_id - mock_graded_subsections.return_value = [subsection] + mock_graded_subsections.return_value = self._mock_graded_subsections() # should filter out everything; all grades are 1 from mock_apps grades api - processor = api.GradeCSVProcessor(course_id=self.course_id, subsection=self.block_id, subsection_grade_max=50) + processor = api.GradeCSVProcessor(course_id=self.course_id, subsection=self.usage_key, subsection_grade_max=50) rows = list(processor.get_iterator()) assert len(rows) != self.NUM_USERS + 1 - processor = api.GradeCSVProcessor(course_id=self.course_id, subsection=self.block_id, subsection_grade_min=200) + processor = api.GradeCSVProcessor(course_id=self.course_id, subsection=self.usage_key, subsection_grade_min=200) rows = list(processor.get_iterator()) assert len(rows) != self.NUM_USERS + 1 @patch('lms.djangoapps.grades.api.CourseGradeFactory.read') def test_course_grade_filters(self, course_grade_factory_mock): - self._make_enrollments() - course_grade_factory_mock.return_value = Mock(percent=0.50) processor = api.GradeCSVProcessor(course_id=self.course_id, max_points=100, course_grade_min=60) @@ -169,13 +248,11 @@ def test_course_grade_filters(self, course_grade_factory_mock): @patch('lms.djangoapps.grades.api.CourseGradeFactory.read') def test_less_than_zero(self, course_grade_factory_mock): - self._make_enrollments() - course_grade_factory_mock.return_value = Mock(percent=0.50) processor = api.GradeCSVProcessor(course_id=self.course_id) row = { - 'block_id': self.block_id, + 'block_id': self.usage_key, 'new_override-block-v1': '-1', 'user_id': self.learner.id, 'csum': '07ec', @@ -193,7 +270,6 @@ class TestInterventionProcessor(BaseTests): @patch('lms.djangoapps.grades.api.CourseGradeFactory.read') @patch('bulk_grades.api.LearnerAPIClient') def test_export(self, mocked_api, mocked_course_grade_factory): - self._make_enrollments() data = { 'audit@example.com': {'videos_overall': 2, 'videos_last_week': 0, 'problems_overall': 10, 'problems_last_week': 5, @@ -216,3 +292,16 @@ def test_export(self, mocked_api, mocked_course_grade_factory): processor = api.InterventionCSVProcessor(course_id=self.course_id) rows = list(processor.get_iterator()) assert len(rows) == 2 + + def test_columns_not_duplicated_during_init(self): + """ + Tests that InterventionCSVProcessor.__init__() does not cause + column names to be duplicated. + """ + processor_1 = api.InterventionCSVProcessor(course_id=self.course_id) + + # pretend that we serialize the processor data to some "state" + state = deepcopy(processor_1.__dict__) + processor_2 = api.InterventionCSVProcessor(**state) + + assert processor_1.columns == processor_2.columns diff --git a/tox.ini b/tox.ini index 96f1f9b..028a693 100644 --- a/tox.ini +++ b/tox.ini @@ -73,7 +73,6 @@ commands = pylint --py3k bulk_grades manage.py setup.py rm tests/__init__.py pycodestyle bulk_grades manage.py setup.py - pydocstyle bulk_grades manage.py setup.py isort --check-only --diff --recursive tests test_utils bulk_grades manage.py setup.py test_settings.py make selfcheck