Skip to content

Commit

Permalink
add mapping connector
Browse files Browse the repository at this point in the history
  • Loading branch information
erichesse committed Feb 6, 2024
1 parent 521cb27 commit 1818a00
Show file tree
Hide file tree
Showing 7 changed files with 658 additions and 438 deletions.
43 changes: 43 additions & 0 deletions assets/mappings/__final__/test_mapping/access-platform.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
contact:
- fieldInPrimarySource: n/a
mappingRules:
- forValues:
- Roland Resolved
rule: Match value using ldap extractor.
identifierInPrimarySource:
- fieldInPrimarySource: n/a
mappingRules:
- setValues:
- "sumo-db"
rule: Use value as it is.
technicalAccessibility:
- fieldInPrimarySource: n/a
mappingRules:
- setValues:
- https://mex.rki.de/item/technical-accessibility-1
comment: internal
title:
- fieldInPrimarySource: n/a
mappingRules:
- setValues:
- language: de
value: SUMO Datenbank
unitInCharge:
- fieldInPrimarySource: n/a
mappingRules:
- forValues:
- Abteilung
rule: Use value to match with identifier in /raw-data/organigram/organizational-units.json.

hadPrimarySource:
- fieldInPrimarySource: n/a
mappingRules:
- rule: "Assign 'stable target id' of primary source with identifier 'nokeda' in /raw-data/primary-sources/primary-sources.json."
identifier:
- fieldInPrimarySource: n/a
mappingRules:
- rule: Assign identifier.
stableTargetId:
- fieldInPrimarySource: n/a
mappingRules:
- rule: Assign 'stable target id' of merged item.
Empty file added mex/mapping/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions mex/mapping/connector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import yaml
from pydantic import BaseModel

from mex.common.models.mapping import MAPPING_MODEL_BY_EXTRACTED_CLASS_NAME


def get_mapping_model(path: str, model_type: type[BaseModel]) -> BaseModel:
"""Return a mapping model with default values.
Args:
path: path to mapping json
model_type: model type of BaseModel to be extracted
Returns:
BaseModel with default values
"""
model = MAPPING_MODEL_BY_EXTRACTED_CLASS_NAME[model_type.__name__]
with open(path, "r", encoding="utf-8") as f:
yaml_model = yaml.safe_load(f)
return model.model_validate(yaml_model)
943 changes: 506 additions & 437 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dagster = "^1.6.2"
dagster-postgres = "^0.22.2"
dagster-webserver = "^1.6.2"
faker = "^22.6.0"
mex-common = { git = "https://github.com/robert-koch-institut/mex-common.git", rev = "0.19.1" }
mex-common = { git = "https://github.com/robert-koch-institut/mex-common.git", rev = "0.19.3" }
numpy = "^1.26.3"
openpyxl = "^3.1.2"
pandas = "^2.1.4"
Expand Down
Empty file added tests/mapping/__init__.py
Empty file.
88 changes: 88 additions & 0 deletions tests/mapping/test_connector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from mex.common.models import ExtractedAccessPlatform
from mex.common.types import AssetsPath, TechnicalAccessibility, TextLanguage
from mex.mapping.connector import get_mapping_model


def test_get_mapping_model() -> None:
mapping_path = AssetsPath(
"assets/mappings/__final__/test_mapping/access-platform.yaml"
)

mapping_model = get_mapping_model(mapping_path, ExtractedAccessPlatform)

expected = {
"identifier": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [{"rule": "Assign identifier."}],
}
],
"hadPrimarySource": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{
"rule": "Assign 'stable target id' of primary source with identifier 'nokeda' in /raw-data/primary-sources/primary-sources.json."
}
],
}
],
"identifierInPrimarySource": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{"setValues": ["sumo-db"], "rule": "Use value as it is."}
],
}
],
"stableTargetId": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{"rule": "Assign 'stable target id' of merged item."}
],
}
],
"contact": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{
"forValues": ["Roland Resolved"],
"rule": "Match value using ldap extractor.",
}
],
}
],
"technicalAccessibility": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [{"setValues": [TechnicalAccessibility["INTERNAL"]]}],
"comment": "internal",
}
],
"title": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{
"setValues": [
{"value": "SUMO Datenbank", "language": TextLanguage.DE}
]
}
],
}
],
"unitInCharge": [
{
"fieldInPrimarySource": "n/a",
"mappingRules": [
{
"forValues": ["Abteilung"],
"rule": "Use value to match with identifier in /raw-data/organigram/organizational-units.json.",
}
],
}
],
}
assert mapping_model.model_dump(exclude_defaults=True) == expected

0 comments on commit 1818a00

Please sign in to comment.