Skip to content

Commit

Permalink
fixing issue with attributes and adding tests
Browse files Browse the repository at this point in the history
Attributes on edges were being included inside the attributes section and in their original form on the top level, this fixes that issue and checks for that situation for edges and nodes. Also cleans up the edge conversion part of cypher.py and adds better comments.
  • Loading branch information
EvanDietzMorris committed Oct 1, 2024
1 parent 2726a66 commit 900f0d0
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 29 deletions.
55 changes: 36 additions & 19 deletions reasoner_transpiler/cypher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json

from collections import defaultdict
from neo4j import AsyncResult, Result

from .attributes import transform_attributes, EDGE_SOURCE_PROPS
from .matching import match_query
Expand Down Expand Up @@ -310,6 +309,8 @@ def transform_nodes_list(nodes):


def transform_edges_list(edges):
# See convert_bolt_edge_to_dict() for details on the contents of edges,
# it is a list of lists (which can also be lists), representing unique edges from the graph
kg_edges = {}
element_id_to_edge_id = {}
for edge_index, cypher_edge_result in enumerate(edges):
Expand All @@ -324,19 +325,19 @@ def transform_edges_list(edges):
cypher_edges = list() # this looks weird, but it's necessary to make a list of one list (I think?)
cypher_edges.append(cypher_edge_result)

# transform the edge into TRAPI
# transform the edge(s) into TRAPI
for cypher_edge in cypher_edges:
edge_element_id, edge_dict = convert_bolt_edge_to_dict(cypher_edge)
edge_id = edge_dict.get('id', f'edge_{edge_index}')
# transform the edge into TRAPI and return:
# edge_element_id - neo4j element id,
# edge_id - the edge id that will be used for edges in the TRAPI knowledge graph and edge bindings
# trapi_edge - a dictionary that represents an edge in the knowledge_graph part of the TRAPI response
edge_element_id, edge_id, trapi_edge = convert_bolt_edge_to_trapi(cypher_edge)
if not edge_id:
edge_id = f'e_{edge_index}'
# make a mapping that will be used to look up the edge id by element id later
element_id_to_edge_id[edge_element_id] = edge_id
# get properties matching EDGE_SOURCE_PROPS keys, remove biolink: if needed,
# then pass (key, value) tuples to construct_sources_tree for formatting, constructing the 'sources' section
edge_dict['sources'] = construct_sources_tree([
(edge_source_prop.removeprefix('biolink:'), edge_dict.get(edge_source_prop))
for edge_source_prop in EDGE_SOURCE_PROPS if edge_dict.get(edge_source_prop, None)])
# convert all remaining attributes to TRAPI format
edge_dict.update(transform_attributes(edge_dict, node=False))
kg_edges[edge_id] = edge_dict
# add it to the knowledge graph
kg_edges[edge_id] = trapi_edge
return kg_edges, element_id_to_edge_id


Expand Down Expand Up @@ -412,23 +413,39 @@ def convert_jolt_node_to_dict(jolt_node):
return node


def convert_bolt_edge_to_dict(bolt_edge):
# Convert a list representing an edge from cypher results into a dictionary.
# This is not any standard object from the bolt driver, it's a list generated by a specific cypher return clause, like:
# [elementId(edge_1), startNode(edge_1).id, type(edge_1), endNode(edge_1).id, properties(edge_1)]
# This is done to prevent including often redundant node and edge properties on nodes and edges in pathway results.
# See the cypher generated in the edges_assemble clause in assemble_results() for more details.
def convert_bolt_edge_to_trapi(bolt_edge):
if not bolt_edge:
print(f'Tried to convert a missing edge: {bolt_edge}')
return None, None

# Convert a list representing an edge from cypher results into a dictionary.
# This is not a standard Edge object from the bolt driver, it's a list product of a specific cypher return format.
# This is done to prevent including redundant node information on every edge.
# See the cypher generated in the edges_assemble clause in assemble_results() for more details.
element_id = bolt_edge[0]
converted_edge = {
'subject': bolt_edge[1],
'predicate': bolt_edge[2],
'object': bolt_edge[3],
**bolt_edge[4]
}
return element_id, converted_edge
# edge_props - any other properties from the edge
edge_props = {**bolt_edge[4]}

# get the id if there is one on the edge
edge_id = edge_props.pop('id', None)

# get properties matching EDGE_SOURCE_PROPS keys, remove biolink: if needed,
# then pass (key, value) tuples to construct_sources_tree for formatting, constructing the sources section
converted_edge['sources'] = construct_sources_tree([
(edge_source_prop.removeprefix('biolink:'), edge_props.pop(edge_source_prop))
for edge_source_prop in EDGE_SOURCE_PROPS if edge_source_prop in edge_props])

# convert all remaining attributes to TRAPI format, constructing the attributes section
converted_edge.update(transform_attributes(edge_props, node=False))

# return element id, a dict with the core edge properties, and a dict with any other properties
return element_id, edge_id, converted_edge,


def convert_jolt_edge_to_dict(jolt_edges, jolt_element_id_lookup):
Expand Down
17 changes: 7 additions & 10 deletions tests/test_props.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,10 @@ def test_numeric(neo4j_driver):
}
output = neo4j_driver.run(get_query(qgraph), convert_to_trapi=True, qgraph=qgraph)
assert len(output["results"]) == 1
results = sorted(
output["knowledge_graph"]["nodes"].values(),
key=lambda node: node["name"],
)
expected_nodes = [
"CASP3",
]
for ind, result in enumerate(results):
assert result["name"] == expected_nodes[ind]

node_1 = list(output["knowledge_graph"]["nodes"].values())[0]
assert node_1["name"] == "CASP3"
assert "length" not in node_1


def test_string(neo4j_driver):
Expand Down Expand Up @@ -176,7 +171,9 @@ def test_valid_biolink_attribute_without_mapping(neo4j_driver):
output = neo4j_driver.run(get_query(qgraph), convert_to_trapi=True, qgraph=qgraph)
edges = output["knowledge_graph"]["edges"]
assert len(edges) == 1
attributes = list(edges.values())[0]["attributes"]
edge = list(edges.values())[0]
assert "p_value" not in edge
attributes = edge["attributes"]
assert len(attributes) == 1
assert attributes[0] == {
"original_attribute_name": "p_value",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_qualifier_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def test_single_qualifier(neo4j_driver):
qualified_predicate_output = {'qualifier_type_id': 'biolink:qualified_predicate', 'qualifier_value': 'biolink:causes'}
qualifier_output = output["knowledge_graph"]["edges"]["qualified_edge_multiple_qualifier"]["qualifiers"]
assert qualified_predicate_output in qualifier_output
assert "qualified_predicate" not in output["knowledge_graph"]["edges"]["qualified_edge_multiple_qualifier"]
assert "biolink:qualified_predicate" not in output["knowledge_graph"]["edges"]["qualified_edge_multiple_qualifier"]


def test_multi_qualifier(neo4j_driver):
Expand Down
1 change: 1 addition & 0 deletions tests/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def test_primary_source(neo4j_driver):
edge_sources = {
e: edge["sources"] for e, edge in output["knowledge_graph"]["edges"].items()
}
assert not any(['primary_knowledge_source' in edge for edge in output["knowledge_graph"]["edges"].values()])
assert edge_sources["metformin_treats_t2d"] == [
{'resource_id': 'infores:test', 'resource_role': 'primary_knowledge_source'},
{'resource_id': 'ctd', 'resource_role': 'aggregator_knowledge_source',
Expand Down

0 comments on commit 900f0d0

Please sign in to comment.