-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
129 lines (117 loc) · 3.85 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# run.py
"""
Tool to transform 4store data dumps
to KGX tsv nodes/edges.
Checks first row of each dump file to
verify if it contains a comment.
"""
import sys
import click
from bioportal_to_kgx.functions import ( # type: ignore
do_transforms,
examine_data_directory,
)
@click.command()
@click.option(
"--input",
required=True,
nargs=1,
help="""Path to the 4store data dump - usually named data""",
)
@click.option(
"--kgx_validate",
is_flag=True,
help="""If used, will run the KGX validator after completing
each transformation.
Validation logs will be written to
each output directory.
If an existing transform is found
without a validation log,
a new validation will be run.""",
)
@click.option(
"--robot_validate",
is_flag=True,
help="""If used, will run ROBOT measure and ROBOT report for
each transformation.
Logs will be written to each output directory.
If an existing transform is found without ROBOT logs,
a new validation will be run.""",
)
@click.option(
"--pandas_validate",
is_flag=True,
help="""If used, will verify that each new and existing transform
can be parsed with pandas without encountering
format errors.""",
)
@click.option(
"--get_bioportal_metadata",
is_flag=True,
help="""If used, will retrieve metadata from BioPortal.
Requires Internet connection and NCBO API key.
(From BioPortal account page.)
Specify the API key in the --ncbo_key parameter.
Metadata is stored in its own KGX TSV nodefile,
e.g., BTO_1_nodes_metadata.tsv""",
)
@click.option("--ncbo_key", help="""Key for the NCBO API.""")
@click.option(
"--write_curies",
is_flag=True,
help="""If used, will convert node IDs to CURIEs, with the ontology
id as prefix.
IRIs will be kept in each node's iri field.""",
)
@click.option(
"--include_only",
callback=lambda _, __, x: x.split(",") if x else [],
help="""One or more ontologies to retreive and transform, and only these,
comma-delimited and named by their hashed file ID,
e.g., dabd4d902360003975fb25ae56f8.""",
)
@click.option(
"--exclude",
callback=lambda _, __, x: x.split(",") if x else [],
help="""One or more ontologies to exclude from transforms,
comma-delimited and named by their hashed file ID,
e.g., dabd4d902360003975fb25ae56f8.""",
)
def run(
input: str,
kgx_validate: bool,
robot_validate: bool,
pandas_validate: bool,
get_bioportal_metadata: bool,
write_curies: bool,
ncbo_key=None,
include_only=[],
exclude=[],
):
if get_bioportal_metadata and not ncbo_key:
sys.exit(
"Cannot access BioPortal metadata without API key. "
"Specify in --ncbo_key parameter."
)
data_filepaths = examine_data_directory(input, include_only, exclude)
transform_status = do_transforms(
data_filepaths,
kgx_validate,
robot_validate,
pandas_validate,
get_bioportal_metadata,
ncbo_key,
write_curies,
)
successes = ", ".join(
list(dict(filter(lambda elem: elem[1], transform_status.items())))
)
failures = ", ".join(
list(dict(filter(lambda elem: not elem[1], transform_status.items())))
)
if successes != "":
print(f"Successful transforms: {successes}")
if failures != "":
print(f"Failed transforms: {failures}")
if __name__ == "__main__":
run()