From 951532272beb3a1b7d393f22aa476e2445da8f35 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Wed, 28 Aug 2024 16:59:22 -0700 Subject: [PATCH] test FHIR load, dataframe traversals test FHIR load adds jsonpath-ng for reference searches improves traversal tests --- test/pygrip_test/fhir/README.md | 5 + test/pygrip_test/fhir/__init__.py | 0 .../META/DocumentReference.ndjson | 1 + .../META/Observation.ndjson | 3 + .../META/Organization.ndjson | 1 + .../fhir-compbio-examples/META/Patient.ndjson | 1 + .../META/ResearchStudy.ndjson | 1 + .../META/ResearchSubject.ndjson | 1 + .../META/Specimen.ndjson | 1 + .../fixtures/fhir-compbio-examples/README.md | 11 + test/pygrip_test/fhir/test_load.py | 264 ++++++++++++++++++ 11 files changed, 289 insertions(+) create mode 100644 test/pygrip_test/fhir/README.md create mode 100644 test/pygrip_test/fhir/__init__.py create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/DocumentReference.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Observation.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Organization.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Patient.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchStudy.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchSubject.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Specimen.ndjson create mode 100644 test/pygrip_test/fhir/fixtures/fhir-compbio-examples/README.md create mode 100644 test/pygrip_test/fhir/test_load.py diff --git a/test/pygrip_test/fhir/README.md b/test/pygrip_test/fhir/README.md new file mode 100644 index 00000000..c4113497 --- /dev/null +++ b/test/pygrip_test/fhir/README.md @@ -0,0 +1,5 @@ +This test has a dependency +```commandline +pip install jsonpath-ng + +``` \ No newline at end of file diff --git a/test/pygrip_test/fhir/__init__.py b/test/pygrip_test/fhir/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/DocumentReference.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/DocumentReference.ndjson new file mode 100644 index 00000000..8da14c2b --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/DocumentReference.ndjson @@ -0,0 +1 @@ +{"resourceType":"DocumentReference","id":"9ae7e542-767f-4b03-a854-7ceed17152cb","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"9ae7e542-767f-4b03-a854-7ceed17152cb"}],"status":"current","docStatus":"final","subject":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"date":"2024-08-21T10:53:00+00:00","content":[{"attachment":{"extension":[{"url":"http://aced-idp.org/fhir/StructureDefinition/md5","valueString":"227f0a5379362d42eaa1814cfc0101b8"},{"url":"http://aced-idp.org/fhir/StructureDefinition/source_path","valueUrl":"file:///home/LabA/specimen_1234_labA.fq.gz"}],"contentType":"text/fastq","url":"file:///home/LabA/specimen_1234_labA.fq.gz","size":5595609484,"title":"specimen_1234_labA.fq.gz","creation":"2024-08-21T10:53:00+00:00"}}]} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Observation.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Observation.ndjson new file mode 100644 index 00000000..774b7051 --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Observation.ndjson @@ -0,0 +1,3 @@ +{"resourceType":"Observation","id":"cec32723-9ede-5f24-ba63-63cb8c6a02cf","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-9ae7e542-767f-4b03-a854-7ceed17152cb-sequencer"}], "status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}]}],"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Gen3 Sequencing Metadata","display":"Gen3 Sequencing Metadata"}]},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"DocumentReference/9ae7e542-767f-4b03-a854-7ceed17152cb"}],"specimen":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sequencer","display":"sequencer"}],"text":"sequencer"},"valueString":"Illumina Seq 1000"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"index","display":"index"}],"text":"index"},"valueString":"100bp Single index"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"type","display":"type"}],"text":"type"},"valueString":"Exome"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"project_id","display":"project_id"}],"text":"project_id"},"valueString":"labA_projectXYZ"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"read_length","display":"read_length"}],"text":"read_length"},"valueString":"100"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"instrument_run_id","display":"instrument_run_id"}],"text":"instrument_run_id"},"valueString":"234_ABC_1_8899"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"capture_bait_set","display":"capture_bait_set"}],"text":"capture_bait_set"},"valueString":"Human Exom 2X"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"end_type","display":"end_type"}],"text":"end_type"},"valueString":"Paired-End"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"capture","display":"capture"}],"text":"capture"},"valueString":"emitter XT"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sequencing_site","display":"sequencing_site"}],"text":"sequencing_site"},"valueString":"AdvancedGeneExom"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"construction","display":"construction"}],"text":"construction"},"valueString":"library_construction"}]} +{"resourceType":"Observation","id":"4e3c6b59-b1fd-5c26-a611-da4cde9fd061","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-specimen_1234_labA-sample_type"}],"status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}],"text":"Laboratory"}],"code":{"coding":[{"system":"https://my_demo.org/labA","code":"labA specimen metadata","display":"labA specimen metadata"}],"text":"sample type abc"},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"}],"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"sample_type","display":"sample_type"}],"text":"sample_type"},"valueString":"Primary Solid Tumor"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"library_id","display":"library_id"}],"text":"library_id"},"valueString":"12345"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"tissue_type","display":"tissue_type"}],"text":"tissue_type"},"valueString":"Tumor"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"treatments","display":"treatments"}],"text":"treatments"},"valueString":"Trastuzumab"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"allocated_for_site","display":"allocated_for_site"}],"text":"allocated_for_site"},"valueString":"TEST Clinical Research"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"pathology_data","display":"pathology_data"}],"text":"pathology_data"}},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"clinical_event","display":"clinical_event"}],"text":"clinical_event"}},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"indexed_collection_date","display":"indexed_collection_date"}],"text":"indexed_collection_date"},"valueInteger":365},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_specimens_bems_id","display":"biopsy_specimens_bems_id"}],"text":"biopsy_specimens"},"valueString":"specimenA, specimenB, specimenC"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_procedure_type","display":"biopsy_procedure_type"}],"text":"biopsy_procedure_type"},"valueString":"Biopsy - Core"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"biopsy_anatomical_location","display":"biopsy_anatomical_location"}],"text":"biopsy_anatomical_location"},"valueString":"top axillary lymph node"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"percent_tumor","display":"percent_tumor"}],"text":"percent_tumor"},"valueString":"30"}]} +{"resourceType":"Observation","id":"21f3411d-89a4-4bcc-9ce7-b76edb1c745f","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234-9ae7e542-767f-4b03-a854-7ceed17152cb-Gene"}], "status":"final","category":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/observation-category","code":"laboratory","display":"Laboratory"}]}],"code":{"coding":[{"system":"https://loinc.org","code":"81247-9","display":"Genomic structural variant copy number"}]},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"focus":[{"reference":"DocumentReference/9ae7e542-767f-4b03-a854-7ceed17152cb"}],"specimen":{"reference":"Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9"},"component":[{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Gene","display":"Gene"}],"text":"Gene"},"valueString":"TP53"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"Chromosome","display":"Chromosome"}],"text":"Chromosome"},"valueString":"chr17"},{"code":{"coding":[{"system":"https://my_demo.org/labA","code":"result","display":"result"}],"text":"result"},"valueString":"gain of function (GOF)"}]} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Organization.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Organization.ndjson new file mode 100644 index 00000000..967445ae --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Organization.ndjson @@ -0,0 +1 @@ +{"resourceType":"Organization","id":"89c8dc4c-2d9c-48c7-8862-241a49a78f14","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"LabA_ORGANIZATION"}],"type":[{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/organization-type","code":"prov","display":"Healthcare Provider"}],"text":"An organization that provides healthcare services."},{"coding":[{"system":"http://terminology.hl7.org/CodeSystem/organization-type","code":"edu","display":"Educational Institute"}],"text":"An educational institution that provides education or research facilities."}]} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Patient.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Patient.ndjson new file mode 100644 index 00000000..107bf78e --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Patient.ndjson @@ -0,0 +1 @@ +{"resourceType":"Patient","id":"bc4e1aa6-cb52-40e9-8f20-594d9c84f920","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"patientX_1234"}],"active":true} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchStudy.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchStudy.ndjson new file mode 100644 index 00000000..74cc4002 --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchStudy.ndjson @@ -0,0 +1 @@ +{"resourceType":"ResearchStudy","id":"7dacd4d0-3c8e-470b-bf61-103891627d45","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"labA"}],"name":"LabA","status":"active","description":"LabA Clinical Trial Study: FHIR Schema Chorot Integration"} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchSubject.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchSubject.ndjson new file mode 100644 index 00000000..6aee6d08 --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/ResearchSubject.ndjson @@ -0,0 +1 @@ +{"resourceType":"ResearchSubject","id":"2fc448d6-a23b-4b94-974b-c66110164851","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"subjectX_1234"}],"status":"active","study":{"reference":"ResearchStudy/7dacd4d0-3c8e-470b-bf61-103891627d45"},"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"}} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Specimen.ndjson b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Specimen.ndjson new file mode 100644 index 00000000..b79c72cb --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/META/Specimen.ndjson @@ -0,0 +1 @@ +{"resourceType":"Specimen","id":"60c67a06-ea2d-4d24-9249-418dc77a16a9","identifier":[{"use":"official","system":"https://my_demo.org/labA","value":"specimen_1234_labA"}],"subject":{"reference":"Patient/bc4e1aa6-cb52-40e9-8f20-594d9c84f920"},"collection":{"collector":{"reference":"Organization/89c8dc4c-2d9c-48c7-8862-241a49a78f14"},"bodySite":{"concept":{"coding":[{"system":"http://snomed.info/sct","code":"76752008","display":"Breast"}],"text":"Breast"}}},"processing":[{"method":{"coding":[{"system":"http://snomed.info/sct","code":"117032008","display":"Spun specimen (procedure)"},{"system":"https://my_demo.org/labA","code":"Double-Spun","display":"Double-Spun"}],"text":"Spun specimen (procedure)"}}]} \ No newline at end of file diff --git a/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/README.md b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/README.md new file mode 100644 index 00000000..bcad0e36 --- /dev/null +++ b/test/pygrip_test/fhir/fixtures/fhir-compbio-examples/README.md @@ -0,0 +1,11 @@ +##### META folder test-data: + +``` +>>>> resources={'summary': {'DocumentReference': 1, 'Specimen': 1, 'Observation': 3, 'ResearchStudy': 1, 'ResearchSubject': 1, 'Organization': 1, 'Patient': 1}} +``` + +There are three Observations with user-defined metadata component. +1. Focus - reference -> Specimen +2. Focus - reference -> DocumentReference + 1. The first Observation contains metadata on the file's sequencing metadata. + 2. The second Observation includes a simple summary of a CNV analysis result computed from this file. diff --git a/test/pygrip_test/fhir/test_load.py b/test/pygrip_test/fhir/test_load.py new file mode 100644 index 00000000..6446749b --- /dev/null +++ b/test/pygrip_test/fhir/test_load.py @@ -0,0 +1,264 @@ +import json +import pathlib +import types +from collections import defaultdict + +import pytest + +import pygrip +from jsonpath_ng import jsonpath, parse + +from typing import Generator, Dict, Any + + +def resources() -> Generator[Dict[str, Any], None, None]: + """Read a directory of ndjson files, return dictionary for each line.""" + base = pathlib.Path(__file__).parent.absolute() + fixture_path = pathlib.Path(base / 'fixtures' / 'fhir-compbio-examples' / 'META') + assert fixture_path.exists(), f"Fixture path {fixture_path.absolute()} does not exist." + for file in fixture_path.glob('*.ndjson'): + with open(str(file)) as fp: + for l_ in fp.readlines(): + yield json.loads(l_) + + +@pytest.fixture +def expected_edges() -> list[tuple]: + """Return the expected edges for the resources.""" + return [('21f3411d-89a4-4bcc-9ce7-b76edb1c745f', '60c67a06-ea2d-4d24-9249-418dc77a16a9', 'specimen'), + ('21f3411d-89a4-4bcc-9ce7-b76edb1c745f', '9ae7e542-767f-4b03-a854-7ceed17152cb', 'focus'), + ('21f3411d-89a4-4bcc-9ce7-b76edb1c745f', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'subject'), + ('2fc448d6-a23b-4b94-974b-c66110164851', '7dacd4d0-3c8e-470b-bf61-103891627d45', 'study'), + ('2fc448d6-a23b-4b94-974b-c66110164851', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'subject'), + ('4e3c6b59-b1fd-5c26-a611-da4cde9fd061', '60c67a06-ea2d-4d24-9249-418dc77a16a9', 'focus'), + ('4e3c6b59-b1fd-5c26-a611-da4cde9fd061', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'subject'), + ('60c67a06-ea2d-4d24-9249-418dc77a16a9', '89c8dc4c-2d9c-48c7-8862-241a49a78f14', 'collection_collector'), + ('60c67a06-ea2d-4d24-9249-418dc77a16a9', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'subject'), + ('9ae7e542-767f-4b03-a854-7ceed17152cb', '60c67a06-ea2d-4d24-9249-418dc77a16a9', 'subject'), + ('cec32723-9ede-5f24-ba63-63cb8c6a02cf', '60c67a06-ea2d-4d24-9249-418dc77a16a9', 'specimen'), + ('cec32723-9ede-5f24-ba63-63cb8c6a02cf', '9ae7e542-767f-4b03-a854-7ceed17152cb', 'focus'), + ('cec32723-9ede-5f24-ba63-63cb8c6a02cf', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'subject')] + + +@pytest.fixture +def expected_vertices() -> list[tuple]: + """Return the expected vertices (only id, label) for the resources.""" + return [('21f3411d-89a4-4bcc-9ce7-b76edb1c745f', 'Observation'), + ('2fc448d6-a23b-4b94-974b-c66110164851', 'ResearchSubject'), + ('4e3c6b59-b1fd-5c26-a611-da4cde9fd061', 'Observation'), + ('60c67a06-ea2d-4d24-9249-418dc77a16a9', 'Specimen'), + ('7dacd4d0-3c8e-470b-bf61-103891627d45', 'ResearchStudy'), + ('89c8dc4c-2d9c-48c7-8862-241a49a78f14', 'Organization'), + ('9ae7e542-767f-4b03-a854-7ceed17152cb', 'DocumentReference'), + ('bc4e1aa6-cb52-40e9-8f20-594d9c84f920', 'Patient'), + ('cec32723-9ede-5f24-ba63-63cb8c6a02cf', 'Observation')] + + +@pytest.fixture +def expected_dataframe_associations(): + return { + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851'): [ + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'), + ('Patient', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920'), + ('Specimen', '60c67a06-ea2d-4d24-9249-418dc77a16a9')], + ('Specimen', '60c67a06-ea2d-4d24-9249-418dc77a16a9'): [ + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'), + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851'), + ('Patient', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920'), + ('Observation', '4e3c6b59-b1fd-5c26-a611-da4cde9fd061')], + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'): [ + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851')], + ('Organization', '89c8dc4c-2d9c-48c7-8862-241a49a78f14'): [ + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'), + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851'), + ('Patient', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920'), + ('Specimen', '60c67a06-ea2d-4d24-9249-418dc77a16a9'), + ('DocumentReference', '9ae7e542-767f-4b03-a854-7ceed17152cb')], + ('DocumentReference', '9ae7e542-767f-4b03-a854-7ceed17152cb'): [ + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'), + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851'), + ('Patient', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920'), + ('Specimen', '60c67a06-ea2d-4d24-9249-418dc77a16a9'), + ('Observation', '21f3411d-89a4-4bcc-9ce7-b76edb1c745f'), + ('Observation', 'cec32723-9ede-5f24-ba63-63cb8c6a02cf')], + ('Patient', 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920'): [ + ('ResearchStudy', '7dacd4d0-3c8e-470b-bf61-103891627d45'), + ('ResearchSubject', '2fc448d6-a23b-4b94-974b-c66110164851'), + ('Specimen', '60c67a06-ea2d-4d24-9249-418dc77a16a9'), + ('Observation', '21f3411d-89a4-4bcc-9ce7-b76edb1c745f'), + ('Observation', '4e3c6b59-b1fd-5c26-a611-da4cde9fd061'), + ('Observation', 'cec32723-9ede-5f24-ba63-63cb8c6a02cf')] + } + + +def match_label(self, vertex_gid, label, seen_already=None) -> dict: + """Recursively find the first vertex of a given label, starting traversals from vertex_gid.""" + + # check params + assert vertex_gid is not None, "Expected vertex_gid to be not None." + assert label is not None, "Expected label to be not None." + # mutable default arguments are evil + # See https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil + if seen_already is None: + seen_already = [] + + # get all edges for vertex + q = self.V(vertex_gid).both() + + # get all vertices for edges + # TODO - consider if this should be a vertices_of_label() -> generator[dict] instead + for _ in q: + if _['vertex']['label'] == label: + return _ + else: + if _['vertex']['gid'] in seen_already: + continue + seen_already.append(_['vertex']['gid']) + return self.match_label(_['vertex']['gid'], label, seen_already=seen_already) + + +def dataframe_associations(self, vertex_gid, vertex_label, labels=('ResearchStudy', 'ResearchSubject', 'Patient', 'Specimen', 'DocumentReference', 'Observation')) -> list[dict]: + """Return all objects associated with vertex_gid.""" + associations = [] + for label in labels: + if label == 'Observation': + continue + if vertex_label == label: + continue + _ = self.match_label(vertex_gid, label) + if _ is not None: + associations.append(_['vertex']['data']) + if 'Observation' in labels: + q = self.V(vertex_gid).in_(["focus", "subject"]).hasLabel("Observation") + for _ in q: + associations.append(_['vertex']['data']) + return associations + + +@pytest.fixture +def graph() -> pygrip.GraphDBWrapper: + """Load the resources into the graph.""" + graph = pygrip.NewMemServer() + jsonpath_expr = parse('*..reference') + for _ in resources(): + graph.addVertex(_['id'], _['resourceType'], _) + for match in jsonpath_expr.find(_): + # value will be something like "Specimen/60c67a06-ea2d-4d24-9249-418dc77a16a9" + # full_path will be something like "specimen.reference" or "focus.[0].reference" + type_, dst_id = match.value.split('/') + path_parts = str(match.full_path).split('.') + path_parts = [part for part in path_parts if '[' not in part and part != 'reference'] + label = '_'.join(path_parts) + graph.addEdge(_['id'], dst_id, label) + + # monkey patch the graph object with our methods + graph.match_label = types.MethodType(match_label, graph) + graph.dataframe_associations = types.MethodType(dataframe_associations, graph) + + yield graph + + +def test_graph_vertices(graph, expected_vertices): + """Test the graph vertices.""" + + actual_vertices = [] + for _ in graph.V(): + assert 'vertex' in _, f"Expected 'vertex' in {_}" + vertex = _['vertex'] + assert 'data' in vertex, f"Expected 'data' in {vertex}" + assert 'gid' in vertex, f"Expected 'gid' in {vertex}" + assert 'label' in vertex, f"Expected 'label' in {vertex}" + assert 'data' in vertex, f"Expected 'data' in {vertex}" + resource = _['vertex']['data'] + actual_vertices.append((resource['id'], resource['resourceType'])) + + print(actual_vertices) + assert actual_vertices == expected_vertices, f"Expected {expected_vertices} but got {actual_vertices}." + + +def test_graph_edges(graph, expected_edges): + """Test the graph vertices.""" + + # check edges all edges + actual_edges = [] + for _ in graph.V().outE(): + assert 'edge' in _, f"Expected 'edge' in {_}" + edge = _['edge'] + assert 'gid' in edge, f"Expected 'gid' in {edge}" + assert 'label' in edge, f"Expected 'label' in {edge}" + assert 'from' in edge, f"Expected 'from' in {edge}" + assert 'to' in edge, f"Expected 'to' in {edge}" + assert 'data' in edge, f"Expected 'data' in {edge}" + + actual_edges.append((edge['from'], edge['to'], edge['label'])) + + print(actual_edges) + assert actual_edges == expected_edges, f"Expected {expected_edges} but got {actual_edges}." + + +def test_graph_methods(graph): + """Test the methods we expect in a graph object.""" + assert 'V' in dir(graph), f"Expected 'V' in {type(graph)}" + assert 'match_label' in dir(graph), f"Expected 'match_label' in {type(graph)}" + assert 'dataframe_associations' in dir(graph), f"Expected 'dataframe_associations' in {type(graph)}" + + +def test_traversals(graph): + """Test basic traversals""" + + # specimen -> patient + q = graph.V().hasLabel("Specimen").out("subject") + actual_specimen_patient_count = len(list(q)) + assert actual_specimen_patient_count == 1, f"Expected 1 but got {actual_specimen_patient_count}." + assert list(q)[0]['vertex']['data']['resourceType'] == 'Patient' + + q = graph.V().hasLabel("DocumentReference").outV().hasLabel("Specimen").outV().hasLabel("Patient") + assert len(list(q)) == 1, f"Expected 1 but got {len(list(q))}." + actual_document_reference_patient_count = len(list(q)) + assert actual_document_reference_patient_count == 1, f"Expected 1 but got {actual_document_reference_patient_count}." + assert list(q)[0]['vertex']['data']['resourceType'] == 'Patient' + + # follow edges by edge label + q = graph.V().hasLabel("DocumentReference").out("subject") + assert len(list(q)) == 1, f"Expected 1 but got {len(list(q))}." + for subject in q: + subject = subject['vertex']['data'] + assert subject['resourceType'] == 'Specimen', f"Expected Specimen but got {subject['resourceType']}." + + # follow all out all edges recursively to a vertex of type X + + q = graph.V().hasLabel("DocumentReference") + assert len(list(q)) == 1, f"Expected 1 but got {len(list(q))}." + document_reference_gid = list(q)[0]['vertex']['gid'] + + # 1 hop + specimen = graph.match_label(document_reference_gid, 'Specimen') + assert specimen is not None, "Expected Specimen" + assert specimen['vertex']['gid'] == '60c67a06-ea2d-4d24-9249-418dc77a16a9', f"Expected 60c67a06-ea2d-4d24-9249-418dc77a16a9 but got {specimen}." + + # 2 hops + patient = graph.match_label(document_reference_gid, 'Patient') + assert patient is not None, "Expected Patient" + assert patient['vertex']['gid'] == 'bc4e1aa6-cb52-40e9-8f20-594d9c84f920', f"Expected bc4e1aa6-cb52-40e9-8f20-594d9c84f920 but got {patient}." + + # 4 hops + research_study = graph.match_label(document_reference_gid, 'ResearchStudy') + assert research_study is not None, "Expected ResearchStudy" + assert research_study['vertex']['gid'] == '7dacd4d0-3c8e-470b-bf61-103891627d45', f"Expected 7dacd4d0-3c8e-470b-bf61-103891627d45 but got {research_study}." + + # Observations + q = graph.V(document_reference_gid).in_(["focus", "subject"]).hasLabel("Observation") + assert len(list(q)) == 2, f"Expected 2 but got {len(list(q))} for {document_reference_gid}." + + +def test_dataframe_associations(graph, expected_vertices, expected_dataframe_associations): + """Test the dataframe associations.""" + + actual_dataframe_associations = defaultdict(list) + # for all objects in the graph except Observations, retrieve the associated objects useful for a dataframe + for vertex_gid, vertex_label in expected_vertices: + if vertex_label == 'Observation': + continue + df = graph.dataframe_associations(vertex_gid, vertex_label) + actual_dataframe_associations[(vertex_label, vertex_gid)] = [(_['resourceType'], _['id']) for _ in df] + assert actual_dataframe_associations == expected_dataframe_associations, f"Expected {expected_dataframe_associations} but got {actual_dataframe_associations}."