Skip to content

Commit

Permalink
feat: import/export Taxonomy API functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisChV committed Jun 20, 2023
1 parent 71a84ce commit c2e5099
Show file tree
Hide file tree
Showing 3 changed files with 390 additions and 24 deletions.
193 changes: 192 additions & 1 deletion openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,28 @@
Please look at the models.py file for more information about the kinds of data
are stored in this app.
"""
import csv
import json
from enum import Enum
from io import StringIO, BytesIO, TextIOWrapper
from typing import List, Type

from django.db import transaction
from django.db.models import QuerySet
from django.core.exceptions import ObjectDoesNotExist
from django.utils.translation import gettext_lazy as _

from .models import ObjectTag, Tag, Taxonomy

csv_fields = ['id', 'name', 'parent_id', 'parent_name']

class TaxonomyDataFormat(Enum):
"""
Formats used to export and import Taxonomies
"""
CSV = 'CSV'
JSON = 'JSON'


def create_taxonomy(
name,
Expand All @@ -27,6 +42,7 @@ def create_taxonomy(
"""
Creates, saves, and returns a new Taxonomy with the given attributes.
"""

return Taxonomy.objects.create(
name=name,
description=description,
Expand Down Expand Up @@ -98,5 +114,180 @@ def tag_object(
Raised ValueError if the proposed tags are invalid for this taxonomy.
Preserves existing (valid) tags, adds new (valid) tags, and removes omitted (or invalid) tags.
"""

return taxonomy.tag_object(tags, object_id, object_type)


def import_tags(taxonomy: Taxonomy, tags: BytesIO, format: TaxonomyDataFormat, replace=False):
"""
Imports the hierarchical tags from the given blob into the Taxonomy.
The blob can be CSV or JSON format.
If replace, then removes any existing child Tags linked to this taxonomy before performing the import.
"""

# Validations
if taxonomy.allow_free_text:
raise ValueError(
_(
f"Invalid taxonomy ({taxonomy.id}): You can't import free-from tags taxonomies"
)
)
if format not in TaxonomyDataFormat.__members__.values():
raise ValueError(
_(
f"Invalid format: {format}"
)
)

# Read file and build the tags data to be uploaded
try:
tags_data = {}
tags.seek(0)
if format == TaxonomyDataFormat.CSV:
text_tags = TextIOWrapper(tags, encoding='utf-8')
csv_reader = csv.DictReader(text_tags)
header_fields = csv_reader.fieldnames
if csv_fields != header_fields:
raise ValueError(
_(
f"Invalid CSV header: {header_fields}. Must be: {csv_fields}."
)
)
tags_data = list(csv_reader)
else:
# TaxonomyDataFormat.JSON
tags_data = json.load(tags)
if 'tags' not in tags_data:
raise ValueError(
_(
f"Invalid JSON format: Missing 'tags' list."
)
)
tags_data = tags_data.get('tags')
except ValueError as e:
raise e
finally:
tags.close()


new_tags = []
updated_tags = []

def create_update_tag(tag):
"""
Function to create a new Tag or update an existing one.
This function keeps a creation/update history with `new_tags` and `updated_tags`,
a same tag can't be created/updated in a same taxonomy import.
Also, recursively, creates the parents of the `tag`.
Returns the created/updated Tag.
Raise KeyError if 'id' or 'name' don't exist on `tag`
"""

tag_id = tag['id']
tag_name = tag['name']
tag_parent_id = tag.get('parent_id')
tag_parent_name = tag.get('parent_name')

# Check if the tag has not already been created or updated
if tag_id not in new_tags and tag_id not in updated_tags:
try:
# Update tag
tag_instance = Tag.objects.get(external_id=tag_id)
tag_instance.value = tag_name

if tag_instance.parent and (not tag_parent_id or not tag_parent_name):
# if there is no parent in the data import
tag_instance.parent = None
updated_tags.append(tag_id)
except ObjectDoesNotExist:
# Create tag
tag_instance = Tag(
taxonomy=taxonomy,
value=tag_name,
external_id=tag_id,
)
new_tags.append(tag_id)

if tag_parent_id and tag_parent_name:
# Parent creation/update
parent = create_update_tag({'id': tag_parent_id, 'name': tag_parent_name})
tag_instance.parent = parent

tag_instance.save()
return tag_instance
else:
# Returns the created/updated tag from history
return Tag.objects.get(external_id=tag_id)

# Create and update tags
with transaction.atomic():
# Delete all old Tags linked to the taxonomy
if replace:
Tag.objects.filter(taxonomy=taxonomy).delete()

for tag in tags_data:
try:
create_update_tag(tag)
except KeyError as e:
key = e.args[0]
raise ValueError(
_(
f"Invalid JSON format: Missing '{key}' on a tag ({tag})"
)
)
resync_tags()

def export_tags(taxonomy: Taxonomy, format: TaxonomyDataFormat) -> str:
"""
Creates a blob string describing all the tags in the given Taxonomy.
The output format can be CSV or JSON.
"""

# Validations
if taxonomy.allow_free_text:
raise ValueError(
_(
f"Invalid taxonomy ({taxonomy.id}): You can't export free-from tags taxonomies"
)
)
if format not in TaxonomyDataFormat.__members__.values():
raise ValueError(
_(
f"Invalid format: {format}"
)
)

# Build list of tags in a dictionary format
tags = get_tags(taxonomy)
result = []
for tag in tags:
result_tag = {
'id': tag.external_id or tag.id,
'name': tag.value,
}
if tag.parent:
result_tag['parent_id'] = tag.parent.external_id or tag.parent.id
result_tag['parent_name'] = tag.parent.value
result.append(result_tag)

# Convert dictonary into the output format
if format == TaxonomyDataFormat.CSV:
with StringIO() as csv_buffer:
csv_writer = csv.DictWriter(csv_buffer, fieldnames=csv_fields)
csv_writer.writeheader()

for tag in result:
csv_writer.writerow(tag)

csv_string = csv_buffer.getvalue()
return csv_string
else:
# TaxonomyDataFormat.JSON
json_result = {
'name': taxonomy.name,
'description': taxonomy.description,
'tags': result
}
return json.dumps(json_result)
44 changes: 22 additions & 22 deletions tests/openedx_tagging/core/fixtures/tagging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,152 +4,152 @@
taxonomy: 1
parent: null
value: Bacteria
external_id: null
external_id: tag_1
- model: oel_tagging.tag
pk: 2
fields:
taxonomy: 1
parent: null
value: Archaea
external_id: null
external_id: tag_2
- model: oel_tagging.tag
pk: 3
fields:
taxonomy: 1
parent: null
value: Eukaryota
external_id: null
external_id: tag_3
- model: oel_tagging.tag
pk: 4
fields:
taxonomy: 1
parent: 1
value: Eubacteria
external_id: null
external_id: tag_4
- model: oel_tagging.tag
pk: 5
fields:
taxonomy: 1
parent: 1
value: Archaebacteria
external_id: null
external_id: tag_5
- model: oel_tagging.tag
pk: 6
fields:
taxonomy: 1
parent: 2
value: DPANN
external_id: null
external_id: tag_6
- model: oel_tagging.tag
pk: 7
fields:
taxonomy: 1
parent: 2
value: Euryarchaeida
external_id: null
external_id: tag_7
- model: oel_tagging.tag
pk: 8
fields:
taxonomy: 1
parent: 2
value: Proteoarchaeota
external_id: null
external_id: tag_8
- model: oel_tagging.tag
pk: 9
fields:
taxonomy: 1
parent: 3
value: Animalia
external_id: null
external_id: tag_9
- model: oel_tagging.tag
pk: 10
fields:
taxonomy: 1
parent: 3
value: Plantae
external_id: null
external_id: tag_10
- model: oel_tagging.tag
pk: 11
fields:
taxonomy: 1
parent: 3
value: Fungi
external_id: null
external_id: tag_11
- model: oel_tagging.tag
pk: 12
fields:
taxonomy: 1
parent: 3
value: Protista
external_id: null
external_id: tag_12
- model: oel_tagging.tag
pk: 13
fields:
taxonomy: 1
parent: 3
value: Monera
external_id: null
external_id: tag_13
- model: oel_tagging.tag
pk: 14
fields:
taxonomy: 1
parent: 9
value: Arthropoda
external_id: null
external_id: tag_14
- model: oel_tagging.tag
pk: 15
fields:
taxonomy: 1
parent: 9
value: Chordata
external_id: null
external_id: tag_15
- model: oel_tagging.tag
pk: 16
fields:
taxonomy: 1
parent: 9
value: Gastrotrich
external_id: null
external_id: tag_16
- model: oel_tagging.tag
pk: 17
fields:
taxonomy: 1
parent: 9
value: Cnidaria
external_id: null
external_id: tag_17
- model: oel_tagging.tag
pk: 18
fields:
taxonomy: 1
parent: 9
value: Ctenophora
external_id: null
external_id: tag_18
- model: oel_tagging.tag
pk: 19
fields:
taxonomy: 1
parent: 9
value: Placozoa
external_id: null
external_id: tag_19
- model: oel_tagging.tag
pk: 20
fields:
taxonomy: 1
parent: 9
value: Porifera
external_id: null
external_id: tag_20
- model: oel_tagging.tag
pk: 21
fields:
taxonomy: 1
parent: 15
value: Mammalia
external_id: null
external_id: tag_21
- model: oel_tagging.taxonomy
pk: 1
fields:
name: Life on Earth
description: null
description: This taxonomy contains the Kingdoms of the Earth
enabled: true
required: false
allow_multiple: false
Expand Down
Loading

0 comments on commit c2e5099

Please sign in to comment.