Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setting prefix explicitly in excelparser #470

Merged
Merged
57 changes: 44 additions & 13 deletions ontopy/excelparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,18 @@

Note that correct case is mandatory.
"""
from typing import Tuple, Union, Sequence
import os
from typing import Tuple, Union
import warnings

import pandas as pd
import numpy as np
import pyparsing

import ontopy
from ontopy import get_ontology
from ontopy.utils import EMMOntoPyException, NoSuchLabelError
from ontopy.utils import ReadCatalogError, read_catalog
from ontopy.manchester import evaluate
import owlready2 # pylint: disable=C0411

Expand Down Expand Up @@ -97,20 +100,31 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments
imported ontology.

"""
# Get imported ontologies from optional "Imports" sheet
if not imports:
imports = []
web_protocol = "http://", "https://", "ftp://"

def _relative_to_absolute_paths(path):
if isinstance(path, str):
if not path.startswith(web_protocol):
path = os.path.dirname(excelpath) + "/" + str(path)
return path

try:
imports_frame = pd.read_excel(
imports = pd.read_excel(
excelpath, sheet_name=imports_sheet_name, skiprows=[1]
)
except ValueError:
pass
imports = pd.DataFrame()
else:
# Strip leading and trailing white spaces in path
imports.extend(
imports_frame["Imported ontologies"].str.strip().to_list()
# Strip leading and trailing white spaces in paths
imports.replace(r"^\s+", "", regex=True).replace(
r"\s+$", "", regex=True
)
# Set empty strings to nan
imports = imports.replace(r"^\s*$", np.nan, regex=True)
if "Imported ontologies" in imports.columns:
imports["Imported ontologies"] = imports[
"Imported ontologies"
].apply(_relative_to_absolute_paths)

# Read datafile TODO: Some magic to identify the header row
conceptdata = pd.read_excel(
Expand All @@ -131,7 +145,7 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments
def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments
data: pd.DataFrame,
metadata: pd.DataFrame,
imports: list,
imports: pd.DataFrame,
base_iri: str = "http://emmo.info/emmo/domain/onto#",
base_iri_from_metadata: bool = True,
catalog: dict = None,
Expand Down Expand Up @@ -332,7 +346,6 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
all_added_rows.extend(added_rows)

# Add properties in a second loop

for index in all_added_rows:
row = data.loc[index]
properties = row["Relations"]
Expand Down Expand Up @@ -379,14 +392,15 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
concepts_with_errors["in_imported_ontologies"] = concepts_with_errors[
"already_defined"
].intersection(imported_concepts)

return onto, catalog, concepts_with_errors


def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
metadata: pd.DataFrame,
base_iri: str,
base_iri_from_metadata: bool = True,
imports: Sequence = (),
imports: pd.DataFrame = None,
catalog: dict = None,
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""Create ontology with metadata from pd.DataFrame"""
Expand All @@ -409,12 +423,29 @@ def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-bra
# Add imported ontologies
catalog = {} if catalog is None else catalog
locations = set()
for location in imports:
for _, row in imports.iterrows():
# for location in imports:
location = row["Imported ontologies"]
if not pd.isna(location) and location not in locations:
imported = onto.world.get_ontology(location).load()
onto.imported_ontologies.append(imported)
catalog[imported.base_iri.rstrip("#/")] = location
try:
cat = read_catalog(location.rsplit("/", 1)[0])
catalog.update(cat)
except ReadCatalogError:
warnings.warn(f"Catalog for {imported} not found.")
locations.add(location)
# set defined prefix
if not pd.isna(row["prefix"]):
# set prefix for all ontologies with same 'base_iri_root'
if not pd.isna(row["base_iri_root"]):
onto.set_common_prefix(
iri_base=row["base_iri_root"], prefix=row["prefix"]
)
# If base_root not given, set prefix only to top ontology
else:
imported.prefix = row["prefix"]

with onto:
# Add title
Expand Down
11 changes: 9 additions & 2 deletions ontopy/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pathlib import Path
from collections import defaultdict
from collections.abc import Iterable
from urllib.request import HTTPError
from urllib.request import HTTPError, URLError

import rdflib
from rdflib.util import guess_format
Expand All @@ -40,6 +40,7 @@
_validate_installed_version,
LabelDefinitionError,
ThingClassDefinitionError,
EMMOntoPyException,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -617,7 +618,13 @@ def getmtime(path):
if fmt and fmt not in OWLREADY2_FORMATS:
# Convert filename to rdfxml before passing it to owlready2
graph = rdflib.Graph()
graph.parse(resolved_url, format=fmt)
try:
graph.parse(resolved_url, format=fmt)
except URLError as err:
raise EMMOntoPyException(
"URL error", err, resolved_url
) from err

with tempfile.NamedTemporaryFile() as handle:
graph.serialize(destination=handle, format="xml")
handle.seek(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<group id="Folder Repository, directory=, recursive=true, Auto-Update=false, version=2" prefer="public" xml:base="">
<uri name="https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry" uri="https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry2.ttl"/>
<uri name="http://ontology.info/ontology" uri="imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0" uri="imported_onto/ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0/subontology" uri="imported_onto/subontology.ttl"/>
</group>
</catalog>
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"Jesper Friis"@en,
"Sylvain Gouttebroze"@en ;
term:title "A test domain ontology"@en ;
owl:imports <https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry> ;
owl:imports <http://ontology.info/ontology>,
<https://raw.githubusercontent.com/emmo-repo/emmo-repo.github.io/master/versions/1.0.0-beta/emmo-inferred-chemistry> ;
owl:versionInfo "0.01"@en .

:EMMO_0264be35-e8ad-5b35-a1a3-84b37bde22d1 a owl:Class ;
Expand All @@ -25,6 +26,10 @@
:EMMO_4b32833e-0833-56a7-903c-28a6a8191fe8 ;
core:prefLabel "FiniteTemporalPattern"@en .

:EMMO_080262b7-4f7e-582b-916e-8274c73dd629 a owl:Class ;
rdfs:subClassOf <http://ontology.info/ontology#testclass> ;
core:prefLabel "ANewTestClass"@en .

:EMMO_1c81f1eb-8b94-5e74-96de-1aeacbdb5b93 a owl:Class ;
emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "The boundary of a grain"@en ;
rdfs:subClassOf :EMMO_472ed27e-ce08-53cb-8453-56ab363275c4 ;
Expand Down Expand Up @@ -58,6 +63,10 @@
:EMMO_9fa9ca88-2891-538a-a8dd-ccb8a08b9890 ;
core:prefLabel "FiniteSpatioTemporalPattern"@en .

:EMMO_e4e653eb-72cd-5dd6-a428-f506d9679774 a owl:Class ;
rdfs:subClassOf <http://ontology.info/subontology#testclass2> ;
core:prefLabel "AnotherNewTestClass"@en .

:EMMO_e633d033-2af6-5f04-a706-dab826854fb1 a owl:Class ;
emmo:EMMO_967080e5_2f42_4eb2_a3a9_c58143e835f9 "The boundary of a subgrain"@en ;
rdfs:subClassOf owl:Thing ;
Expand Down
7 changes: 7 additions & 0 deletions tests/test_excelparser/imported_onto/catalog-v001.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<group id="Folder Repository, directory=, recursive=true, Auto-Update=false, version=2" prefer="public" xml:base="">
<uri name="http://ontology.info/ontology/0.1.0" uri="./ontology.ttl"/>
<uri name="http://ontology.info/ontology/0.1.0/subontology" uri="./subontology.ttl"/>
</group>
</catalog>
18 changes: 18 additions & 0 deletions tests/test_excelparser/imported_onto/ontology.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@prefix : <http://ontology.info/ontology#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@base <http://ontology.info/ontology> .

<http://ontology.info/ontology> rdf:type owl:Ontology ;
owl:versionIRI <http://ontology.info/ontology/0.1.0> ;
owl:imports <http://ontology.info/ontology/0.1.0/subontology> ;
owl:versionInfo "0.1.0" .


:testclass rdf:type owl:Class ;
rdfs:subClassOf owl:Thing ;
skos:prefLabel "TestClass"@en .
21 changes: 21 additions & 0 deletions tests/test_excelparser/imported_onto/subontology.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@prefix : <http://ontology.info/subontology#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@base <http://ontology.info/subontology> .

<http://ontology.info/subontology> rdf:type owl:Ontology ;
owl:versionIRI <http://ontology.info/ontology/0.1.0/subontology> .


# Annotations
skos:prefLabel rdf:type owl:AnnotationProperty .
skos:altLabel rdf:type owl:AnnotationProperty .


:testclass2 rdf:type owl:Class ;
rdfs:subClassOf owl:Thing ;
skos:prefLabel "TestClass2"@en .
Binary file added tests/test_excelparser/onto.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@

def test_excelparser(repo_dir: "Path") -> None:
"""Basic test for creating an ontology from an Excel file."""
ontopath = (
repo_dir / "tests" / "testonto" / "excelparser" / "fromexcelonto.ttl"
)
ontopath = repo_dir / "tests" / "test_excelparser" / "fromexcelonto.ttl"

onto = get_ontology(str(ontopath)).load()
xlspath = repo_dir / "tests" / "testonto" / "excelparser" / "onto.xlsx"
xlspath = repo_dir / "tests" / "test_excelparser" / "onto.xlsx"
ontology, catalog, errors = create_ontology_from_excel(xlspath, force=True)
assert onto == ontology

Expand Down
7 changes: 4 additions & 3 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None:
import pytest

from ontopy import get_ontology
from ontopy.ontology import HTTPError
from ontopy.ontology import EMMOntoPyException

# Check that the defaults works
emmo = get_ontology("emmo").load() # ttl format
Expand Down Expand Up @@ -36,8 +36,9 @@ def test_load(repo_dir: "Path", testonto: "Ontology") -> None:
assert onto.Electrolyte.prefLabel.first() == "Electrolyte"

with pytest.raises(
HTTPError,
match="HTTP Error 404: https://emmo.info/non-existing/ontology: Not Found",
EMMOntoPyException,
match="'URL error', <HTTPError 404: 'Not Found'>, 'http://emmo.info/non-existing/ontology'"
# match="HTTP Error 404: https://emmo.info/non-existing/ontology: Not Found",
):
get_ontology("http://emmo.info/non-existing/ontology#").load()

Expand Down
4 changes: 2 additions & 2 deletions tests/testonto/catalog-v001.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<group id="Folder Repository, directory=, recursive=true, Auto-Update=false, version=2" prefer="public" xml:base="">
<uri name="http://emmo.info/testonto/0.1.0" uri="testonto.ttl"/>
<uri name="http://emmo.info/testonto/0.1.0/models" uri="models.ttl"/>
<uri name="http://emmo.info/testonto/0.1.0" uri="ontology.ttl"/>
<uri name="http://emmo.info/testonto/0.1.0/models" uri="models.ttl"/>
</group>
</catalog>
Binary file removed tests/testonto/excelparser/onto.xlsx
Binary file not shown.