Skip to content

Commit

Permalink
Merge pull request #69 from INCATools/allow-prefixes-pass-thru
Browse files Browse the repository at this point in the history
Allow prefixes pass-thru.
  • Loading branch information
cmungall authored Mar 10, 2023
2 parents 605844c + 6e16a25 commit b128c79
Show file tree
Hide file tree
Showing 15 changed files with 1,980 additions and 1,664 deletions.
26 changes: 24 additions & 2 deletions ontologies.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ db/ncit.owl: download/ncit.owl


download/fma.owl: STAMP
curl -L -s https://data.bioontology.org/ontologies/FMA/submissions/29/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb > $@.tmp
curl -L -s http://sig.biostr.washington.edu/share/downloads/fma/release/latest/fma.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

Expand Down Expand Up @@ -75,6 +75,17 @@ db/msio.owl: download/msio.owl
cp $< $@


download/modl.owl: STAMP
curl -L -s https://raw.githubusercontent.com/Data-Semantics-Laboratory/modular-ontology-design-library/master/MODL.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/modl.owl

db/modl.owl: download/modl.owl
cp $< $@


download/phenio.owl: STAMP
curl -L -s https://github.com/monarch-initiative/phenio/releases/latest/download/phenio.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand Down Expand Up @@ -130,6 +141,17 @@ db/reacto.owl: download/reacto.owl
cp $< $@


download/bcio.owl: STAMP
curl -L -s http://humanbehaviourchange.org/ontology/bcio.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/bcio.owl

db/bcio.owl: download/bcio.owl
cp $< $@


download/go.owl: STAMP
curl -L -s http://purl.obolibrary.org/obo/go/extensions/go-plus.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand Down Expand Up @@ -503,4 +525,4 @@ download/%.owl: STAMP
db/%.owl: download/%.owl
robot merge -i $< -o $@

EXTRA_ONTOLOGIES = chiro ncit fma maxo foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao edam sweetAll lov schema-dot-org cellosaurus cosmo dbpendiaont co_324 hgnc.genegroup hgnc dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal drugmechdb rxnorm
EXTRA_ONTOLOGIES = chiro ncit fma maxo foodon chebiplus msio modl phenio comploinc bero aio reacto bcio go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo hcao edam sweetAll lov schema-dot-org cellosaurus cosmo dbpendiaont co_324 hgnc.genegroup hgnc dictybase eccode uniprot rhea swisslipid drugbank drugcentral complexportal drugmechdb rxnorm
33 changes: 15 additions & 18 deletions src/semsql/builder/build.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,29 @@ help:
# All dbs are made from an initial template containing
# (1) prefixes
# (2) SQL Schema (primarily views)
$(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes
cat $< | sqlite3 $@.tmp && \
echo .exit | sqlite3 -echo $@.tmp -cmd ".mode csv" -cmd ".import $(THIS_DIR)/prefixes/prefixes.csv prefix" && \
mv $@.tmp $@
.PRECIOUS: $(TEMPLATE)
#$(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes
# cat $< | sqlite3 [email protected] && \
# echo .exit | sqlite3 -echo [email protected] -cmd ".mode csv" -cmd ".import $(THIS_DIR)/prefixes/prefixes.csv prefix" && \
# mv [email protected] $@
#.PRECIOUS: $(TEMPLATE)

%-min.owl: %.owl
robot \
remove -i $< --axioms "equivalent disjoint annotation" \
filter --exclude-terms $(THIS_DIR)/exclude-terms.txt \
-o $@

PREFIX_CSV_PATH = $(PREFIX_DIR)/prefixes.csv
PREFIX_YAML_PATH = $(PREFIX_DIR)/prefixes.yaml

# -- MAIN TARGET --
# A db is constructed from
# (1) triples loaded using rdftab
# (2) A relation-graph TSV
%.db: %.owl %-$(RGSUFFIX).tsv $(TEMPLATE)
cp $(TEMPLATE) $@.tmp && \
%.db: %.owl %-$(RGSUFFIX).tsv $(PREFIX_CSV_PATH)
rm -f $@.tmp && \
cat $(THIS_DIR)/sql_schema/semsql.sql | sqlite3 $@.tmp && \
echo .exit | sqlite3 -echo $@.tmp -cmd ".mode csv" -cmd ".import $(PREFIX_CSV_PATH) prefix" && \
rdftab $@.tmp < $< && \
sqlite3 $@.tmp -cmd '.separator "\t"' ".import $*-$(RGSUFFIX).tsv entailed_edge" && \
gzip -f $*-$(RGSUFFIX).tsv && \
Expand All @@ -61,22 +66,14 @@ $(TEMPLATE): $(THIS_DIR)/sql_schema/semsql.sql build_prefixes

# -- ENTAILED EDGES --
# relation-graph is used to compute entailed edges.
#
# this currently requires a few different steps, because
# - RG currently outputs TTL
# - We need a TSV using correct prefixes/CURIEs to load into our db
#
# will be simplified in future. See:
# - https://github.com/balhoff/relation-graph/issues/123
# - https://github.com/balhoff/relation-graph/issues/25
%-$(RGSUFFIX).tsv: %-min.owl %-properties.txt $(PREFIX_DIR)/prefixes.yaml
%-$(RGSUFFIX).tsv: %-min.owl %-properties.txt $(PREFIX_YAML_PATH)
$(RG) --disable-owl-nothing true \
--ontology-file $<\
$(RG_PROPERTIES) \
--output-file [email protected] \
--equivalence-as-subclass true \
--mode TSV \
--prefixes $(PREFIX_DIR)/prefixes.yaml \
--prefixes $(PREFIX_YAML_PATH) \
--output-individuals true \
--output-subclasses true \
--reflexive-subclasses true && \
Expand Down Expand Up @@ -105,6 +102,6 @@ $(PREFIX_DIR)/prefixes.csv: $(PREFIX_DIR)/prefixes_curated.csv $(PREFIX_DIR)/pre
cat $^ > $@

# see https://github.com/INCATools/relation-graph/issues/168
$(PREFIX_DIR)/prefixes.yaml: $(PREFIX_DIR)/prefixes.csv
$(PREFIX_YAML_PATH): $(PREFIX_CSV_PATH)
grep -v ^prefix, $< | grep -v ^obo, | perl -npe 's@,(.*)@: "$$1"@' > $@.tmp && mv $@.tmp $@

9 changes: 7 additions & 2 deletions src/semsql/builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ class DockerConfig:
memory: str = None


def make(target: str, docker_config: Optional[DockerConfig] = None):
def make(
target: str, docker_config: Optional[DockerConfig] = None, prefix_csv_path=None
):
"""
Builds a target such as a SQLite file using the build.Makefile
:param target: Make target
:param docker_config: if passed, use ODK docker with the specific config
:param prefix_csv_path:
"""
path_to_makefile = str(this_path / "build.Makefile")
if docker_config is not None:
Expand All @@ -60,6 +63,8 @@ def make(target: str, docker_config: Optional[DockerConfig] = None):
else:
pre = []
cmd = pre + ["make", target, "-f", path_to_makefile]
if prefix_csv_path:
cmd += [f"PREFIX_CSV_PATH={prefix_csv_path}"]
logging.info(f"CMD={cmd}")
subprocess.run(cmd)

Expand Down Expand Up @@ -90,7 +95,7 @@ def download_obo_sqlite(ontology: str, destination: str):
db = f"{ontology}.db"
url = f"https://s3.amazonaws.com/bbop-sqlite/{db}.gz"
logging.info(f"Downloading from {url}")
r = requests.get(url, allow_redirects=True)
r = requests.get(url, allow_redirects=True, timeout=3600)
destination_gzip = f"{destination}.gz"
open(destination_gzip, "wb").write(r.content)
with gzip.open(destination_gzip, "rb") as f_in:
Expand Down
5 changes: 3 additions & 2 deletions src/semsql/builder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def main(verbose: int, quiet: bool):
show_default=True,
help="Uses ODK docker image",
)
def make(path, docker):
@click.option("--prefix-csv-path", "-P", help="path to csv of prefix expansions")
def make(path, docker, **kwargs):
"""
Makes a specified target, such as a db file
Expand All @@ -47,7 +48,7 @@ def make(path, docker):
docker_config = builder.DockerConfig()
else:
docker_config = None
builder.make(path, docker_config=docker_config)
builder.make(path, docker_config=docker_config, **kwargs)


@main.command()
Expand Down
2 changes: 2 additions & 0 deletions src/semsql/builder/prefixes/prefixes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ prefix,base
FMA,http://purl.org/sig/ont/fma/fma
MSIO,http://purl.obolibrary.org/obo/MSIO_
nmrCV,http://nmrML.org/nmrCV#NMR:
modl,https://archive.org/services/purl/purl/modular_ontology_design_library#
biolink,https://w3id.org/biolink/vocab/
loinc,https://loinc.org/
BCIO,http://humanbehaviourchange.org/ontology/BCIO_
orcid,https://orcid.org/
evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
old.fix,http://purl.org/obo/owl/FIX#
Expand Down
2 changes: 2 additions & 0 deletions src/semsql/builder/prefixes/prefixes_local.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ prefix,base
FMA,http://purl.org/sig/ont/fma/fma
MSIO,http://purl.obolibrary.org/obo/MSIO_
nmrCV,http://nmrML.org/nmrCV#NMR:
modl,https://archive.org/services/purl/purl/modular_ontology_design_library#
biolink,https://w3id.org/biolink/vocab/
loinc,https://loinc.org/
BCIO,http://humanbehaviourchange.org/ontology/BCIO_
orcid,https://orcid.org/
evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
old.fix,http://purl.org/obo/owl/FIX#
Expand Down
11 changes: 9 additions & 2 deletions src/semsql/builder/registry/ontologies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ ontologies:
url: http://purl.obolibrary.org/obo/ncit.owl
build_command: "robot relax -i $< merge -o $@"
fma:
# note: this is the public API key on the main bioportal site
url: "https://data.bioontology.org/ontologies/FMA/submissions/29/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb"
url: http://sig.biostr.washington.edu/share/downloads/fma/release/latest/fma.owl
prefixmap:
FMA: http://purl.org/sig/ont/fma/fma
maxo:
Expand All @@ -28,6 +27,10 @@ ontologies:
prefixmap:
MSIO: http://purl.obolibrary.org/obo/MSIO_
nmrCV: "http://nmrML.org/nmrCV#NMR:"
modl:
url: https://raw.githubusercontent.com/Data-Semantics-Laboratory/modular-ontology-design-library/master/MODL.owl
prefixmap:
modl: "https://archive.org/services/purl/purl/modular_ontology_design_library#"
phenio:
description: Monarch Phenomics Integrated Ontology
url: https://github.com/monarch-initiative/phenio/releases/latest/download/phenio.owl
Expand All @@ -51,6 +54,10 @@ ontologies:
url: https://raw.githubusercontent.com/berkeleybop/artificial-intelligence-ontology/main/aio.owl
reacto:
url: http://purl.obolibrary.org/obo/go/extensions/reacto.owl
bcio:
url: http://humanbehaviourchange.org/ontology/bcio.owl
prefixmap:
BCIO: http://humanbehaviourchange.org/ontology/BCIO_
go:
url: http://purl.obolibrary.org/obo/go/extensions/go-plus.owl
go-lego:
Expand Down
2 changes: 0 additions & 2 deletions src/semsql/builder/registry/registry_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):

# Enumerations
class FormatEnum(EnumDefinitionImpl):

n3 = PermissibleValue(text="n3", description="n3")

_defn = EnumDefinition(
Expand All @@ -335,7 +334,6 @@ class FormatEnum(EnumDefinitionImpl):


class CompressionEnum(EnumDefinitionImpl):

gzip = PermissibleValue(text="gzip", description="gzip")

_defn = EnumDefinition(
Expand Down
Loading

0 comments on commit b128c79

Please sign in to comment.