Skip to content

Commit

Permalink
Update serialization of schemes and concepts for refdt #107
Browse files Browse the repository at this point in the history
  • Loading branch information
johnatawnclementawn committed Oct 24, 2024
1 parent b6c9fb1 commit e0ec0c3
Showing 1 changed file with 8 additions and 40 deletions.
48 changes: 8 additions & 40 deletions arches_lingo/utils/concept_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,10 @@
CONCEPT_NAME_CONTENT_NODE,
CONCEPT_NAME_LANGUAGE_NODE,
CONCEPT_NAME_TYPE_NODE,
HIDDEN_LABEL_VALUE_ID,
LANGUAGE_CONCEPT_ID,
SCHEME_NAME_NODEGROUP,
SCHEME_NAME_CONTENT_NODE,
SCHEME_NAME_LANGUAGE_NODE,
SCHEME_NAME_TYPE_NODE,
PREF_LABEL_VALUE_ID,
ALT_LABEL_VALUE_ID,
)
from arches_lingo.utils.query_expressions import JsonbArrayElements

Expand All @@ -43,8 +39,6 @@ class ConceptBuilder:
def __init__(self):
self.schemes = ResourceInstance.objects.none()

# key=concept valueid (str) val=language code
self.language_concepts: dict[str:str] = {}
# key=scheme resourceid (str) val=set of concept resourceids (str)
self.top_concepts: dict[str : set[str]] = defaultdict(set)
# key=concept resourceid (str) val=set of concept resourceids (str)
Expand All @@ -66,7 +60,6 @@ def __init__(self):
def read_from_cache(self):
from_cache = cache.get_many(
[
"language_concepts",
"top_concepts",
"narrower_concepts",
"schemes",
Expand All @@ -76,7 +69,6 @@ def read_from_cache(self):
]
)
try:
self.language_concepts = from_cache["language_concepts"]
self.top_concepts = from_cache["top_concepts"]
self.narrower_concepts = from_cache["narrower_concepts"]
self.schemes = from_cache["schemes"]
Expand All @@ -87,12 +79,10 @@ def read_from_cache(self):
self.rebuild_cache()

def rebuild_cache(self):
self.language_concepts_map()
self.top_concepts_map()
self.narrower_concepts_map()
self.populate_schemes()

cache.set("language_concepts", self.language_concepts)
cache.set("top_concepts", self.top_concepts)
cache.set("narrower_concepts", self.narrower_concepts)
cache.set("schemes", self.schemes)
Expand All @@ -101,16 +91,6 @@ def rebuild_cache(self):
cache.set("broader_concepts", self.broader_concepts)
cache.set("schemes_by_top_concept", self.schemes_by_top_concept)

@staticmethod
def human_label_type(value_id):
if value_id == PREF_LABEL_VALUE_ID:
return "prefLabel"
if value_id == ALT_LABEL_VALUE_ID:
return "altLabel"
if value_id == HIDDEN_LABEL_VALUE_ID:
return "hiddenLabel"
return "unknown"

@staticmethod
def resources_from_tiles(lookup_expression: str):
return CombinedExpression(
Expand All @@ -137,20 +117,6 @@ def labels_subquery(label_nodegroup):
).values("data")
)

def language_concepts_map(self):
language_preflabels = ConceptValue.objects.filter(
Exists(
Relation.objects.filter(
conceptfrom=LANGUAGE_CONCEPT_ID,
conceptto=OuterRef("concept_id"),
relationtype="narrower",
)
),
valuetype="prefLabel",
)
for language_label in language_preflabels:
self.language_concepts[str(language_label.pk)] = language_label.value

def top_concepts_map(self):
top_concept_of_tiles = (
TileModel.objects.filter(nodegroup_id=TOP_CONCEPT_OF_NODE_AND_NODEGROUP)
Expand Down Expand Up @@ -198,15 +164,16 @@ def serialize_scheme(self, scheme: ResourceInstance, *, children=True):
return data

def serialize_scheme_label(self, label_tile: dict):
lang_code = self.language_concepts[label_tile[SCHEME_NAME_LANGUAGE_NODE][0]]
valuetype_id = label_tile[SCHEME_NAME_TYPE_NODE][0]["labels"][0]["value"]
language_id = label_tile[SCHEME_NAME_LANGUAGE_NODE][0]["labels"][0]["value"]
localized_string_objs = label_tile[SCHEME_NAME_CONTENT_NODE].values()
try:
value = next(iter(localized_string_objs))["value"]
except (StopIteration, KeyError):
value = "Unknown"
return {
"valuetype_id": self.human_label_type(label_tile[SCHEME_NAME_TYPE_NODE]),
"language_id": lang_code,
"valuetype_id": valuetype_id,
"language_id": language_id,
"value": value,
}

Expand Down Expand Up @@ -256,14 +223,15 @@ def add_broader_concept_recursive(self, working_parent_list, conceptid):
)

def serialize_concept_label(self, label_tile: dict):
lang_code = self.language_concepts[label_tile[CONCEPT_NAME_LANGUAGE_NODE][0]]
valuetype_id = label_tile[CONCEPT_NAME_TYPE_NODE][0]["labels"][0]["value"]
language_id = label_tile[CONCEPT_NAME_LANGUAGE_NODE][0]["labels"][0]["value"]
localized_string_objs = label_tile[CONCEPT_NAME_CONTENT_NODE].values()
try:
value = next(iter(localized_string_objs))["value"]
except (StopIteration, KeyError):
value = "Unknown"
return {
"valuetype_id": self.human_label_type(label_tile[CONCEPT_NAME_TYPE_NODE]),
"language_id": lang_code,
"valuetype_id": valuetype_id,
"language_id": language_id,
"value": value,
}

0 comments on commit e0ec0c3

Please sign in to comment.