From b2767fa7825036ff925c5ef27c9096ab292aa648 Mon Sep 17 00:00:00 2001 From: Jillian Date: Mon, 9 Oct 2023 11:52:52 +1030 Subject: [PATCH] Taxonomy import templates [FC-0036] (#89) feat: adds view to download CSV and JSON taxonomy import template files to demonstrate their format and usage. Also alters the import code to ignore extra fields in import files. This allows us to add comments to our template import files, but more importantly, allows users to import tags from an external source which may have extra data we don't care about. --- MANIFEST.in | 2 +- openedx_learning/__init__.py | 2 +- .../core/tagging/import_export/parsers.py | 12 +- .../core/tagging/import_export/template.csv | 30 ++++ .../core/tagging/import_export/template.json | 158 ++++++++++++++++++ .../core/tagging/rest_api/v1/urls.py | 7 +- .../core/tagging/rest_api/v1/views_import.py | 51 ++++++ .../tagging/import_export/test_parsers.py | 11 ++ .../tagging/import_export/test_template.py | 90 ++++++++++ .../core/tagging/test_views_import.py | 39 +++++ 10 files changed, 394 insertions(+), 8 deletions(-) create mode 100644 openedx_tagging/core/tagging/import_export/template.csv create mode 100644 openedx_tagging/core/tagging/import_export/template.json create mode 100644 openedx_tagging/core/tagging/rest_api/v1/views_import.py create mode 100644 tests/openedx_tagging/core/tagging/import_export/test_template.py create mode 100644 tests/openedx_tagging/core/tagging/test_views_import.py diff --git a/MANIFEST.in b/MANIFEST.in index 0a2abd08..73b8b7ee 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ include LICENSE.txt include README.rst include requirements/base.in recursive-include openedx_learning *.html *.png *.gif *.js *.css *.jpg *.jpeg *.svg *.py -recursive-include openedx_tagging *.html *.png *.gif *.js *.css *.jpg *.jpeg *.svg *.py *.yaml +recursive-include openedx_tagging *.html *.png *.gif *.js *.css *.jpg *.jpeg *.svg *.py *.yaml *.json *.csv diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index 6f229c82..fe8603ba 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index 4413c04c..c0b8207f 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -117,6 +117,7 @@ def _parse_tags( row = cls.inital_row for tag in tags_data: has_error = False + tag_data = {} # Verify the required fields for req_field in cls.required_fields: @@ -140,20 +141,21 @@ def _parse_tags( ) ) has_error = True + else: + tag_data[req_field] = tag[req_field] - tag["index"] = row + tag_data["index"] = row row += 1 # Skip parse if there is an error if has_error: continue - # Updating any empty optional field to None + # Optional fields default to None for opt_field in cls.optional_fields: - if opt_field in tag and not tag.get(opt_field): - tag[opt_field] = None + tag_data[opt_field] = tag.get(opt_field) or None - tags.append(TagItem(**tag)) + tags.append(TagItem(**tag_data)) return tags, errors diff --git a/openedx_tagging/core/tagging/import_export/template.csv b/openedx_tagging/core/tagging/import_export/template.csv new file mode 100644 index 00000000..cdee4984 --- /dev/null +++ b/openedx_tagging/core/tagging/import_export/template.csv @@ -0,0 +1,30 @@ +id,value,parent_id,comments +WINDS,Wind instruments,,"This is an example Tag Import file, in CSV format." +PERCUSS,Percussion instruments,,"Only the 'id' and 'value' fields are required. They can be anything you like, but they must must be unique within the taxonomy. Existing tags matching 'id' will be updated on import." +ELECTRIC,Electronic instruments,,"Top-level tags have no 'parent_id', and you can have as many top-level tags as you wish." +STRINGS,String instruments,,"All other fields (like these 'comments') are ignored on import, and will not be included in subsequent tag exports." +BELLS,Idiophone,PERCUSS,"Providing a 'parent_id' creates a tag hierarchy." +DRUMS,Membranophone,PERCUSS,"The 'parent_id' must match an 'id' found earlier in the import file." +CAJÓN,Cajón,DRUMS,"Tag values may contain unicode characters." +PYLE,Pyle Stringed Jam Cajón,CAJÓN,"A tag hierarchy may contain as many as 3 levels. This tag is at level 4, and so it will not be shown to users." +THERAMIN,Theramin,ELECTRIC,"A tag hierarchy may contain uneven levels. Here, the Electronic branch has only 2 levels, while Percussion has 3." +CHORD,Chordophone,PERCUSS, +BRASS,Brass,WINDS, +WOODS,Woodwinds,WINDS, +FLUTE,Flute,WOODS, +PLUCK,Plucked strings,STRINGS, +MANDOLIN,Mandolin,PLUCK, +HARP,Harp,PLUCK, +BANJO,Banjo,PLUCK, +BOW,Bowed strings,STRINGS, +VIOLIN,Violin,BOW, +CELLO,Cello,BOW, +CLARINET,Clarinet,WOODS, +OBOE,Oboe,WOODS, +TRUMPET,Trumpet,BRASS, +TUBA,Tuba,BRASS, +SYNTH,Synthesizer,ELECTRIC, +CELESTA,Celesta,BELLS, +HI-HAT,Hi-hat,BELLS, +TABLA,Tabla,DRUMS, +PIANO,Piano,CHORD, diff --git a/openedx_tagging/core/tagging/import_export/template.json b/openedx_tagging/core/tagging/import_export/template.json new file mode 100644 index 00000000..666a34b9 --- /dev/null +++ b/openedx_tagging/core/tagging/import_export/template.json @@ -0,0 +1,158 @@ +{ + "tags": [ + { + "id": "WINDS", + "value": "Wind instruments", + "parent_id": "", + "comments": "This is an example Tag Import file, in JSON format." + }, + { + "id": "PERCUSS", + "value": "Percussion instruments", + "parent_id": "", + "comments": "Only the 'id' and 'value' fields are required. They can be anything you like, but they must must be unique within the taxonomy. Existing tags matching 'id' will be updated on import." + }, + { + "id": "ELECTRIC", + "value": "Electronic instruments", + "parent_id": "", + "comments": "Top-level tags have no 'parent_id', and you can have as many top-level tags as you wish." + }, + { + "id": "STRINGS", + "value": "String instruments", + "parent_id": "", + "comments": "All other fields (like these 'comments') are ignored on import, and will not be included in subsequent tag exports." + }, + { + "id": "BELLS", + "value": "Idiophone", + "parent_id": "PERCUSS", + "comments": "Providing a 'parent_id' creates a tag hierarchy." + }, + { + "id": "DRUMS", + "value": "Membranophone", + "parent_id": "PERCUSS", + "comments": "The 'parent_id' must match an 'id' found earlier in the import file." + }, + { + "id": "CAJÓN", + "value": "Cajón", + "parent_id": "DRUMS", + "comments": "Tag values may contain unicode characters." + }, + { + "id": "PYLE", + "value": "Pyle Stringed Jam Cajón", + "parent_id": "CAJÓN", + "comments": "A tag hierarchy may contain as many as 3 levels. This tag is at level 4, and so it will not be shown to users." + }, + { + "id": "THERAMIN", + "value": "Theramin", + "parent_id": "ELECTRIC", + "comments": "A tag hierarchy may contain uneven levels. Here, the Electronic branch has only 2 levels, while Percussion has 3." + }, + { + "id": "CHORD", + "value": "Chordophone", + "parent_id": "PERCUSS" + }, + { + "id": "BRASS", + "value": "Brass", + "parent_id": "WINDS" + }, + { + "id": "WOODS", + "value": "Woodwinds", + "parent_id": "WINDS" + }, + { + "id": "FLUTE", + "value": "Flute", + "parent_id": "WOODS" + }, + { + "id": "PLUCK", + "value": "Plucked strings", + "parent_id": "STRINGS" + }, + { + "id": "MANDOLIN", + "value": "Mandolin", + "parent_id": "PLUCK" + }, + { + "id": "HARP", + "value": "Harp", + "parent_id": "PLUCK" + }, + { + "id": "BANJO", + "value": "Banjo", + "parent_id": "PLUCK" + }, + { + "id": "BOW", + "value": "Bowed strings", + "parent_id": "STRINGS" + }, + { + "id": "VIOLIN", + "value": "Violin", + "parent_id": "BOW" + }, + { + "id": "CELLO", + "value": "Cello", + "parent_id": "BOW" + }, + { + "id": "CLARINET", + "value": "Clarinet", + "parent_id": "WOODS" + }, + { + "id": "OBOE", + "value": "Oboe", + "parent_id": "WOODS" + }, + { + "id": "TRUMPET", + "value": "Trumpet", + "parent_id": "BRASS" + }, + { + "id": "TUBA", + "value": "Tuba", + "parent_id": "BRASS" + }, + { + "id": "SYNTH", + "value": "Synthesizer", + "parent_id": "ELECTRIC" + }, + { + "id": "CELESTA", + "value": "Celesta", + "parent_id": "BELLS" + }, + { + "id": "HI-HAT", + "value": "Hi-hat", + "parent_id": "BELLS" + }, + { + "id": "TABLA", + "value": "Tabla", + "parent_id": "DRUMS" + }, + { + "id": "PIANO", + "value": "Piano", + "parent_id": "CHORD" + } + ] +} diff --git a/openedx_tagging/core/tagging/rest_api/v1/urls.py b/openedx_tagging/core/tagging/rest_api/v1/urls.py index 7b96cf98..72ff87d0 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/urls.py +++ b/openedx_tagging/core/tagging/rest_api/v1/urls.py @@ -5,7 +5,7 @@ from django.urls.conf import include, path from rest_framework.routers import DefaultRouter -from . import views +from . import views, views_import router = DefaultRouter() router.register("taxonomies", views.TaxonomyView, basename="taxonomy") @@ -18,4 +18,9 @@ views.TaxonomyTagsView.as_view(), name="taxonomy-tags", ), + path( + "import/template.", + views_import.TemplateView.as_view(), + name="taxonomy-import-template", + ), ] diff --git a/openedx_tagging/core/tagging/rest_api/v1/views_import.py b/openedx_tagging/core/tagging/rest_api/v1/views_import.py new file mode 100644 index 00000000..6c3a6339 --- /dev/null +++ b/openedx_tagging/core/tagging/rest_api/v1/views_import.py @@ -0,0 +1,51 @@ +""" +Taxonomy Import views +""" +from __future__ import annotations + +import os + +from django.http import FileResponse, Http404 +from rest_framework.request import Request +from rest_framework.views import APIView + + +class TemplateView(APIView): + """ + View which serves the static Taxonomy Import template files. + + **Example Requests** + GET /tagging/rest_api/v1/import/template.csv + GET /tagging/rest_api/v1/import/template.json + + **Query Returns** + * 200 - Success + * 404 - Template file not found + * 405 - Method not allowed + """ + http_method_names = ['get'] + + template_dir = os.path.join( + os.path.dirname(__file__), + "../../import_export/", + ) + allowed_ext_to_content_type = { + "csv": "text/csv", + "json": "application/json", + } + + def get(self, request: Request, file_ext: str, *args, **kwargs) -> FileResponse: + """ + Downloads the requested file as an attachment, + or raises 404 if not found. + """ + content_type = self.allowed_ext_to_content_type.get(file_ext) + if not content_type: + raise Http404 + + filename = f"template.{file_ext}" + content_disposition = f'attachment; filename="{filename}"' + fh = open(os.path.join(self.template_dir, filename), "rb") + response = FileResponse(fh, content_type=content_type) + response['Content-Disposition'] = content_disposition + return response diff --git a/tests/openedx_tagging/core/tagging/import_export/test_parsers.py b/tests/openedx_tagging/core/tagging/import_export/test_parsers.py index 3bcd2667..52a75afa 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_parsers.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_parsers.py @@ -77,6 +77,12 @@ def test_load_data_errors(self) -> None: ) @ddt.data( + ( + {"tags": [ + {"id": "tag_1", "value": "Tag 1", "comment": "This field is ignored."}, # Valid + ]}, + [] + ), ( {"tags": [ {"id": "tag_1", "value": "Tag 1"}, # Valid @@ -209,6 +215,11 @@ class TestCSVParser(TestImportExportMixin, TestCase): # Valid "id,value\n", [] + ), + ( + # Valid + "id,value,ignored\n", + [] ) ) @ddt.unpack diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py new file mode 100644 index 00000000..067a338c --- /dev/null +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -0,0 +1,90 @@ +""" +Tests the import/export template files. +""" +from __future__ import annotations + +import os + +import ddt # type: ignore[import] +from django.test.testcases import TestCase + +from openedx_tagging.core.tagging.api import get_tags +from openedx_tagging.core.tagging.import_export import ParserFormat +from openedx_tagging.core.tagging.import_export import api as import_api + +from .mixins import TestImportExportMixin + + +@ddt.ddt +class TestImportTemplate(TestImportExportMixin, TestCase): + """ + Test the CSV import/export template. + """ + + def open_template_file(self, template_file): + """ + Returns an open file handler for the template file. + """ + dirname = os.path.dirname(__file__) + filename = os.path.join( + dirname, + '../../../../..', + template_file, + ) + return open(filename, "rb") + + @ddt.data( + ('openedx_tagging/core/tagging/import_export/template.csv', ParserFormat.CSV), + ('openedx_tagging/core/tagging/import_export/template.json', ParserFormat.JSON), + ) + @ddt.unpack + def test_import_template(self, template_file, parser_format): + with self.open_template_file(template_file) as import_file: + assert import_api.import_tags( + self.taxonomy, + import_file, + parser_format, + replace=True, + ), import_api.get_last_import_log(self.taxonomy) + + imported_tags = [ + { + "external_id": tag.external_id, + "value": tag.value, + "parent": tag.parent.external_id if tag.parent else None, + } + for tag in get_tags(self.taxonomy) + ] + assert imported_tags == [ + {'external_id': "ELECTRIC", 'parent': None, 'value': 'Electronic instruments'}, + {'external_id': 'PERCUSS', 'parent': None, 'value': 'Percussion instruments'}, + {'external_id': 'STRINGS', 'parent': None, 'value': 'String instruments'}, + {'external_id': 'WINDS', 'parent': None, 'value': 'Wind instruments'}, + {'external_id': 'SYNTH', 'parent': 'ELECTRIC', 'value': 'Synthesizer'}, + {'external_id': 'THERAMIN', 'parent': 'ELECTRIC', 'value': 'Theramin'}, + {'external_id': 'CHORD', 'parent': 'PERCUSS', 'value': 'Chordophone'}, + {'external_id': 'BELLS', 'parent': 'PERCUSS', 'value': 'Idiophone'}, + {'external_id': 'DRUMS', 'parent': 'PERCUSS', 'value': 'Membranophone'}, + {'external_id': 'BOW', 'parent': 'STRINGS', 'value': 'Bowed strings'}, + {'external_id': 'PLUCK', 'parent': 'STRINGS', 'value': 'Plucked strings'}, + {'external_id': 'BRASS', 'parent': 'WINDS', 'value': 'Brass'}, + {'external_id': 'WOODS', 'parent': 'WINDS', 'value': 'Woodwinds'}, + {'external_id': 'CELLO', 'parent': 'BOW', 'value': 'Cello'}, + {'external_id': 'VIOLIN', 'parent': 'BOW', 'value': 'Violin'}, + {'external_id': 'TRUMPET', 'parent': 'BRASS', 'value': 'Trumpet'}, + {'external_id': 'TUBA', 'parent': 'BRASS', 'value': 'Tuba'}, + {'external_id': 'PIANO', 'parent': 'CHORD', 'value': 'Piano'}, + # This tag is present in the import files, but it will be omitted from get_tags() + # because it sits beyond TAXONOMY_MAX_DEPTH. + # {'external_id': 'PYLE', 'parent': 'CAJÓN', 'value': 'Pyle Stringed Jam Cajón'}, + {'external_id': 'CELESTA', 'parent': 'BELLS', 'value': 'Celesta'}, + {'external_id': 'HI-HAT', 'parent': 'BELLS', 'value': 'Hi-hat'}, + {'external_id': 'CAJÓN', 'parent': 'DRUMS', 'value': 'Cajón'}, + {'external_id': 'TABLA', 'parent': 'DRUMS', 'value': 'Tabla'}, + {'external_id': 'BANJO', 'parent': 'PLUCK', 'value': 'Banjo'}, + {'external_id': 'HARP', 'parent': 'PLUCK', 'value': 'Harp'}, + {'external_id': 'MANDOLIN', 'parent': 'PLUCK', 'value': 'Mandolin'}, + {'external_id': 'CLARINET', 'parent': 'WOODS', 'value': 'Clarinet'}, + {'external_id': 'FLUTE', 'parent': 'WOODS', 'value': 'Flute'}, + {'external_id': 'OBOE', 'parent': 'WOODS', 'value': 'Oboe'}, + ] diff --git a/tests/openedx_tagging/core/tagging/test_views_import.py b/tests/openedx_tagging/core/tagging/test_views_import.py new file mode 100644 index 00000000..4c2d94c9 --- /dev/null +++ b/tests/openedx_tagging/core/tagging/test_views_import.py @@ -0,0 +1,39 @@ +""" +Tests import REST API views. +""" +from __future__ import annotations + +import ddt # type: ignore[import] +from rest_framework import status +from rest_framework.test import APITestCase + +TAXONOMY_TEMPLATE_URL = "/tagging/rest_api/v1/import/{filename}" + + +@ddt.ddt +class TestTemplateView(APITestCase): + """ + Tests the taxonomy template downloads. + """ + @ddt.data( + ("template.csv", "text/csv"), + ("template.json", "application/json"), + ) + @ddt.unpack + def test_download(self, filename, content_type): + url = TAXONOMY_TEMPLATE_URL.format(filename=filename) + response = self.client.get(url) + assert response.status_code == status.HTTP_200_OK + assert response.headers['Content-Type'] == content_type + assert response.headers['Content-Disposition'] == f'attachment; filename="{filename}"' + assert int(response.headers['Content-Length']) > 0 + + def test_download_not_found(self): + url = TAXONOMY_TEMPLATE_URL.format(filename="template.txt") + response = self.client.get(url) + assert response.status_code == status.HTTP_404_NOT_FOUND + + def test_download_method_not_allowed(self): + url = TAXONOMY_TEMPLATE_URL.format(filename="template.txt") + response = self.client.post(url) + assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED