Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add import taxonomy endpoint #112

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions openedx_tagging/core/tagging/import_export/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,32 @@

from django.utils.translation import gettext as _

from .. import api as taxonomy_api
from ..models import TagImportTask, TagImportTaskState, Taxonomy
from .exceptions import TagImportError
from .import_plan import TagImportPlan, TagImportTask
from .parsers import ParserFormat, get_parser


def create_taxonomy_and_import_tags(
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
taxonomy_name: str,
taxonomy_description: str,
file: BytesIO,
parser_format: ParserFormat
) -> bool:
"""
Create a taxonomy and import the tags from `file`
"""
taxonomy = taxonomy_api.create_taxonomy(taxonomy_name, taxonomy_description)

import_success = import_tags(taxonomy, file, parser_format)

if not import_success:
taxonomy.delete()

return import_success
pomegranited marked this conversation as resolved.
Show resolved Hide resolved


def import_tags(
taxonomy: Taxonomy,
file: BytesIO,
Expand Down
31 changes: 31 additions & 0 deletions openedx_tagging/core/tagging/rest_api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from rest_framework.reverse import reverse

from openedx_tagging.core.tagging.data import TagData
from openedx_tagging.core.tagging.import_export.parsers import ParserFormat
from openedx_tagging.core.tagging.models import ObjectTag, Tag, Taxonomy


Expand Down Expand Up @@ -175,3 +176,33 @@ class TaxonomyTagDeleteBodySerializer(serializers.Serializer): # pylint: disabl
child=serializers.CharField(), required=True
)
with_subtags = serializers.BooleanField(required=False)


class TaxonomyImportBodySerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer of the body for the Taxonomy Import action
rpenido marked this conversation as resolved.
Show resolved Hide resolved
"""
taxonomy_name = serializers.CharField(required=True)
taxonomy_description = serializers.CharField(default="")
file = serializers.FileField(required=True)

def get_parser_format(self, obj):
"""
Returns the ParserFormat based on the file extension
"""
filename = obj["file"].name
ext = filename.split('.')[-1]
if ext.lower() == 'csv':
return ParserFormat.CSV
elif ext.lower() == 'json':
return ParserFormat.JSON
rpenido marked this conversation as resolved.
Show resolved Hide resolved
else:
raise serializers.ValidationError(f'File type not supported: ${ext.lower()}')

def to_internal_value(self, data):
"""
Adds the parser_format to the validated data
"""
validated_data = super().to_internal_value(data)
validated_data['parser_format'] = self.get_parser_format(data)
return validated_data
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 5 additions & 0 deletions openedx_tagging/core/tagging/rest_api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@
views_import.TemplateView.as_view(),
name="taxonomy-import-template",
),
path(
"import/",
views_import.ImportView.as_view(),
name="taxonomy-import",
),
]
49 changes: 48 additions & 1 deletion openedx_tagging/core/tagging/rest_api/v1/views_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@

import os

from django.http import FileResponse, Http404
from django.http import FileResponse, Http404, HttpResponse
from rest_framework.request import Request
from rest_framework.views import APIView

from ...import_export import api
from .serializers import TaxonomyImportBodySerializer


class TemplateView(APIView):
"""
Expand Down Expand Up @@ -49,3 +52,47 @@ def get(self, request: Request, file_ext: str, *args, **kwargs) -> FileResponse:
response = FileResponse(fh, content_type=content_type)
response['Content-Disposition'] = content_disposition
return response


class ImportView(APIView):
"""
View to import taxonomies

**Example Requests**
POST /tagging/rest_api/v1/import/
{
"taxonomy_name": "Taxonomy Name",
"taxonomy_description": "This is a description",
"file": <file>,
}

**Query Returns**
* 200 - Success
* 400 - Bad request
* 405 - Method not allowed
"""
http_method_names = ['post']
pomegranited marked this conversation as resolved.
Show resolved Hide resolved

def post(self, request: Request, *args, **kwargs) -> HttpResponse:
"""
Imports the taxonomy from the uploaded file.
"""
body = TaxonomyImportBodySerializer(data=request.data)
body.is_valid(raise_exception=True)

taxonomy_name = body.validated_data["taxonomy_name"]
taxonomy_description = body.validated_data["taxonomy_description"]
file = body.validated_data["file"].file
parser_format = body.validated_data["parser_format"]

import_success = api.create_taxonomy_and_import_tags(
taxonomy_name=taxonomy_name,
taxonomy_description=taxonomy_description,
file=file,
parser_format=parser_format,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm.. the import/export tags API was designed to use async celery tasks so it could run in the background (in case there's a lot of tags to import).

@bradenmacdonald do you want us to use these async tasks here, or are we doing all imports synchronously?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect synchronous will be totally fine for the short term, but we might as well do it async since we've done most of the work already. How much work will that add to make the REST API layer async too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do we mean to make the call async here?

  1. The client makes the API call, gets the response "Job running.." immediately, and later is notified (websocket or pooling) that the job is done and the results
  2. The client makes the API call, and the REST API calls the Python API async but waits for the result before sending the response to the client (the HTTP request will be "sync", but we will use the celery task)

The 2 is simple (but I don't know if we have some gain that way).
1 size depends on what we want to accomplish. Will we just show a toast, or we want a page listing the job history?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant (1). But with simple polling for now, no need for websockets or anything fancy. For (2) it actually ties up much more server resources than just directly doing the import synchronously.

If that's going to take a while to implement though we can just to sync for now.

Copy link
Contributor Author

@rpenido rpenido Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it!
There is only one problem in our flow:
We need to create the Taxonomy before calling the import.

If the import fails, it is easy to remove the created taxonomy in the sync call.

Doing async, we can create the taxonomy, return the id to the client, and make calls waiting for the results (the result is associated with the taxonomy). But I need to figure out how to delete the freshly created taxonomy after a failed attempt. Would you happen to have any ideas on this?

This is not urgent for this task, but we will need to handle it if we want to go async sometime in the future. One approach is to change the import flow in the front-end: we first create the taxonomy and always call import inside it (then we don't need to handle the delete). This will impact our designs and this task a bit.

Another approach is to handle the create+import sync and use async for importing on an already created taxonomy. But it will also impact the UX, having different flows.

I think the appropriate solution will be to refactor the import to let it create the taxonomy and change the results to be tied to some kind of "job-id", and not the taxonomy itself.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the right approach is to create an additional async task which creates the taxonomy, then synchronously calls the existing import task (which is a celery task so can be called either sync or async, but since we're already in an async task, we call it sync). If that succeeds, it returns the taxonomy ID, and if it fails, it deletes the taxonomy and returns an error message.

The main thing is that this "wrapper" task which does create+import returns an import ID not a taxonomy ID. And then the frontend polls the import ID until it gets either a success or failure message. Only then does it get the taxonomy ID.


But that's all getting too complicated. Just do it synchronously for now an we'll make it async if it's too slow in practice. Explicitly put a comment in the REST API for the import that "this is an unstable API and may change if we make it async."


if import_success:
return HttpResponse(status=200)
else:
return HttpResponse(status=400)
44 changes: 44 additions & 0 deletions tests/openedx_tagging/core/tagging/test_views_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
"""
from __future__ import annotations

import json

import ddt # type: ignore[import]
from django.core.files.uploadedfile import SimpleUploadedFile
from rest_framework import status
from rest_framework.test import APITestCase

from openedx_tagging.core.tagging.models import Tag, Taxonomy

TAXONOMY_TEMPLATE_URL = "/tagging/rest_api/v1/import/{filename}"
TAXONOMY_IMPORT_URL = "/tagging/rest_api/v1/import/"


@ddt.ddt
Expand Down Expand Up @@ -37,3 +43,41 @@ def test_download_method_not_allowed(self):
url = TAXONOMY_TEMPLATE_URL.format(filename="template.txt")
response = self.client.post(url)
assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED


class TestImportView(APITestCase):
"""
Tests the import taxonomy view.
"""

def test_import(self):
url = TAXONOMY_IMPORT_URL
new_tags = [
{"id": "tag_1", "value": "Tag 1"},
{"id": "tag_2", "value": "Tag 2"},
{"id": "tag_3", "value": "Tag 3"},
{"id": "tag_4", "value": "Tag 4"},
]
json_data = {"tags": new_tags}
file = SimpleUploadedFile("taxonomy.json", json.dumps(json_data).encode(), content_type="application/json")

response = self.client.post(
url,
{
"taxonomy_name": "Imported Taxonomy name",
"taxonomy_description": "Imported Taxonomy description",
"file": file
},
format="multipart"
)
assert response.status_code == status.HTTP_200_OK

# Check if the taxonomy was created
taxonomy = Taxonomy.objects.get(name="Imported Taxonomy name")
assert taxonomy.description == "Imported Taxonomy description"

# Check if the tags were created
tags = list(Tag.objects.filter(taxonomy=taxonomy))
assert len(tags) == len(new_tags)
for i, tag in enumerate(tags):
assert tag.value == new_tags[i]["value"]
Loading