Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing separate methods for JSON and JSONLD #494

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
631 changes: 315 additions & 316 deletions poetry.lock

Large diffs are not rendered by default.

69 changes: 68 additions & 1 deletion src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def parse_sssom_json(
converter = curies.chain(
[
_get_built_in_prefix_map(),
Converter.from_jsonld(file_path),
# Converter.from_jsonld(file_path), TODO: This wont work, as the JSON format has no notion of a "curie_map"
Converter.from_prefix_map(meta.pop(CURIE_MAP, {})),
ensure_converter(prefix_map, use_defaults=False),
]
Expand All @@ -296,6 +296,37 @@ def parse_sssom_json(
return msdf


def parse_sssom_jsonld(
file_path: str, prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None, **kwargs
) -> MappingSetDataFrame:
"""Parse a JSON LD file to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
raise_for_bad_path(file_path)

with open(file_path) as json_file:
jsondoc = json.load(json_file)

# Initialize meta if it's None
if meta is None:
meta = {}

# The priority order for combining prefix maps are:
# 1. Built-in prefix map
# 2. Internal prefix map inside the document
# 3. Prefix map passed through this function inside the ``meta``
# 4. Prefix map passed through this function to ``prefix_map`` (handled with ensure_converter)
converter = curies.chain(
[
_get_built_in_prefix_map(),
Converter.from_jsonld(file_path),
Converter.from_prefix_map(meta.pop(CURIE_MAP, {})),
ensure_converter(prefix_map, use_defaults=False),
]
)

msdf = from_sssom_jsonld(jsondoc=jsondoc, prefix_map=converter, meta=meta)
return msdf


# Import methods from external file formats


Expand Down Expand Up @@ -511,6 +542,41 @@ def from_sssom_json(
return to_mapping_set_dataframe(mapping_set_document)


def from_sssom_jsonld(
jsondoc: Union[str, dict, TextIO],
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
) -> MappingSetDataFrame:
"""Load a mapping set dataframe from a JSON object.

:param jsondoc: JSON document
:param prefix_map: Prefix map
:param meta: metadata used to augment the metadata existing in the mapping set
:return: MappingSetDataFrame object
"""
converter = ensure_converter(prefix_map)

mapping_set = cast(MappingSet, JSONLoader().load(source=jsondoc, target_class=MappingSet))

# The priority order for combining metadata is:
# 1. Metadata appearing in the SSSOM document
# 2. Metadata passed through ``meta`` to this function
# 3. Default metadata

# As the Metadata appearing in the SSSOM document is already parsed by LinkML
# we only need to overwrite the metadata from 2 and 3 if it is not present
combine_meta = dict(
ChainMap(
meta or {},
get_default_metadata(),
)
)

_set_metadata_in_mapping_set(mapping_set, metadata=combine_meta, overwrite=False)
mapping_set_document = MappingSetDocument(mapping_set=mapping_set, converter=converter)
return to_mapping_set_dataframe(mapping_set_document)


def from_alignment_minidom(
dom: Document,
prefix_map: ConverterHint = None,
Expand Down Expand Up @@ -699,6 +765,7 @@ def _make_mdict(
"obographs-json": parse_obographs_json,
"alignment-api-xml": parse_alignment_xml,
"json": parse_sssom_json,
"jsonld": parse_sssom_jsonld,
"rdf": parse_sssom_rdf,
}

Expand Down
18 changes: 18 additions & 0 deletions src/sssom/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,15 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json")
raise ValueError(f"Unknown json format: {serialisation}, currently only json supported")


def write_jsonld(msdf: MappingSetDataFrame, output: TextIO, serialisation="jsonld") -> None:
"""Write a mapping set dataframe to the file as JSON."""
if serialisation == "jsonld":
data = to_jsonld(msdf)
json.dump(data, output, indent=2)
else:
raise ValueError(f"Unknown json format: {serialisation}, currently only json supported")


def write_owl(
msdf: MappingSetDataFrame,
file: TextIO,
Expand Down Expand Up @@ -449,6 +458,14 @@ def _update_sssom_context_with_prefixmap(converter: Converter):


def to_json(msdf: MappingSetDataFrame) -> JsonObj:
"""Convert a mapping set dataframe to a JSON object."""
doc = to_mapping_set_document(msdf)
data = JSONDumper().dumps(doc.mapping_set, inject_type=False)
json_obj = json.loads(data)
return json_obj


def to_jsonld(msdf: MappingSetDataFrame) -> JsonObj:
"""Convert a mapping set dataframe to a JSON object."""
doc = to_mapping_set_document(msdf)
context = _update_sssom_context_with_prefixmap(doc.converter)
Expand Down Expand Up @@ -498,6 +515,7 @@ def to_ontoportal_json(msdf: MappingSetDataFrame) -> List[Dict]:
"owl": (write_owl, SSSOM_DEFAULT_RDF_SERIALISATION),
"ontoportal_json": (write_ontoportal_json, None),
"fhir_json": (write_fhir_json, None),
"jsonld": (write_jsonld, None),
"json": (write_json, None),
"rdf": (write_rdf, SSSOM_DEFAULT_RDF_SERIALISATION),
}
Expand Down
1 change: 0 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ def run_remove(self, runner: CliRunner, test_case: SSSOMTestCase) -> Result:
self.run_successful(result, test_case)
return result

@unittest.skip("this test doesn't actually test anything, just runs help")
def test_convert_cli(self):
"""Test conversion of SSSOM tsv to OWL format when multivalued metadata items are present."""
test_sssom = data_dir / "test_inject_metadata_msdf.tsv"
Expand Down
21 changes: 21 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from_obographs,
from_sssom_dataframe,
from_sssom_json,
from_sssom_jsonld,
from_sssom_rdf,
parse_sssom_table,
)
Expand Down Expand Up @@ -273,6 +274,22 @@ def test_parse_sssom_json(self):
f"{self.json_file} has the wrong number of mappings.",
)

def test_parse_sssom_jsonld(self):
"""Test parsing JSON."""
msdf = from_sssom_jsonld(
jsondoc=self.json,
prefix_map=self.df_converter,
meta=self.metadata,
)
path = os.path.join(test_out_dir, "test_parse_sssom_json.tsv")
with open(path, "w") as file:
write_table(msdf, file)
self.assertEqual(
len(msdf.df),
141,
f"{self.json_file} has the wrong number of mappings.",
)

# * "mapping_justification" is no longer multivalued.
# def test_piped_element_to_list(self):
# """Test for multi-valued element (piped in SSSOM tables) to list."""
Expand Down Expand Up @@ -440,6 +457,10 @@ def test_round_trip_json(self):
"""Test writing then reading JSON."""
self._basic_round_trip("json")

def test_round_trip_jsonld(self):
"""Test writing then reading JSON."""
self._basic_round_trip("jsonld")

def test_round_trip_rdf(self):
"""Test writing then reading RDF."""
self._basic_round_trip("rdf")
Expand Down
19 changes: 16 additions & 3 deletions tests/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
SUBJECT_ID,
SUBJECT_LABEL,
)
from sssom.parsers import parse_sssom_json, parse_sssom_rdf, parse_sssom_table
from sssom.parsers import parse_sssom_json, parse_sssom_jsonld, parse_sssom_rdf, parse_sssom_table
from sssom.writers import (
_update_sssom_context_with_prefixmap,
to_json,
to_jsonld,
write_fhir_json,
write_json,
write_jsonld,
write_ontoportal_json,
write_owl,
write_rdf,
Expand Down Expand Up @@ -82,6 +83,18 @@ def test_write_sssom_json(self):
f"{path} has the wrong number of mappings.",
)

def test_write_sssom_jsonld(self):
"""Test writing as JSON."""
path = os.path.join(test_out_dir, "test_write_sssom_json.json")
with open(path, "w") as file:
write_jsonld(self.msdf, file)
msdf = parse_sssom_jsonld(path)
self.assertEqual(
len(msdf.df),
self.mapping_count,
f"{path} has the wrong number of mappings.",
)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Little @cthoyt in Nicos head:

AGAIN??? Please add short explicit tests so I can understand what is going, in particular the difference between JSON and JSONLD serialisations.

Copy link
Collaborator Author

@matentzn matentzn Feb 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@matentzn: I will add tests after we have had some discussions on the nature of the JSON output.

def test_write_sssom_json_context(self):
"""Test when writing to JSON, the context is correctly written as well."""
rows = [
Expand All @@ -107,7 +120,7 @@ def test_write_sssom_json_context(self):
df = pd.DataFrame(rows, columns=columns)
msdf = MappingSetDataFrame(df)
msdf.clean_prefix_map()
json_object = to_json(msdf)
json_object = to_jsonld(msdf)
self.assertIn("@context", json_object)
self.assertIn("DOID", json_object["@context"])
self.assertIn("mapping_set_id", json_object["@context"])
Expand Down
Loading