Skip to content

Commit

Permalink
create_from_excel/pandas return as list of concepts that are worngly …
Browse files Browse the repository at this point in the history
…defined in the excelfile (#396)

* Added returning errors in excelparser

* Added test to check that all concpets that are defined in the wrong manner are listed in the error dictionary returned.

* Documented new output from excelparser
  • Loading branch information
francescalb authored Apr 7, 2022
1 parent 7d66edb commit 5d2971f
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 7 deletions.
71 changes: 65 additions & 6 deletions ontopy/excelparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,38 @@ def create_ontology_from_excel( # pylint: disable=too-many-arguments
base_iri_from_metadata: Whether to use base IRI defined from metadata.
imports: List of imported ontologies.
catalog: Imported ontologies with (name, full path) key/value-pairs.
force: Forcibly make an ontology by skipping concepts with a prefLabel
that is erroneously defined.
force: Forcibly make an ontology by skipping concepts
that are erroneously defined or other errors in the excel sheet.
Returns:
A tuple of the created ontology and the associated catalog of ontology
names and resolvable path as dict.
A tuple with the
* created ontology
* associated catalog of ontology names and resolvable path as dict
* a dictionary with lists of concepts that raise errors, with the
following keys:
- "already_defined": These are concepts that are already in
the ontology,
either because they were already added in a
previous line of
the excelfile/pandas dataframe,
or because it is already defined
in the imported ontologies.
- "in_imported_ontologies": Concepts that are defined in the excel,
but already exist in the imported ontologies.
This is a subset of the 'already_defined'
- "wrongly_defined": Concepts that are given an invalid prefLabel
(e.g. with a space in the name).
- "missing_parents": Concepts that are missing parents.
These concepts are added directly
under owl:Thing.
- "invalid_parents": Concepts with invalidly defined parents.
These concepts are added directly
under owl:Thing.
- "nonadded_concepts": List of all concepts that are not added,
either because the prefLabel is invalid,
or because the concept has already been added
once or already exists in an imported
ontology.
"""
Expand Down Expand Up @@ -115,6 +141,8 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
) -> Tuple[ontopy.ontology.Ontology, dict]:
"""
Create an ontology from a pandas DataFrame.
Check 'create_ontology_from_excel' for complete documentation.
"""

# Remove lines with empty prefLabel
Expand All @@ -130,6 +158,10 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
onto, catalog = get_metadata_from_dataframe(
metadata, base_iri, imports=imports
)
# Get a set of imported concepts
imported_concepts = {
concept.prefLabel.first() for concept in onto.get_entities()
}

# Set given or default base_iri if base_iri_from_metadata is False.
if not base_iri_from_metadata:
Expand All @@ -140,6 +172,16 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
if not altlabel == "nan":
labels.update(altlabel.split(";"))

# Dictionary with lists of concepts that raise errors
concepts_with_errors = {
"already_defined": [],
"in_imported_ontologies": [],
"wrongly_defined": [],
"missing_parents": [],
"invalid_parents": [],
"nonadded_concepts": [],
}

onto.sync_python_names()
with onto:
remaining_rows = set(range(len(data)))
Expand All @@ -158,6 +200,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
f'Ignoring concept "{name}" since it is already in '
"the ontology."
)
concepts_with_errors["already_defined"].append(name)
# What to do if we want to add info to this concept?
# Should that be not allowed?
# If it should be allowed the index has to be added to
Expand All @@ -168,14 +211,16 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
f'Ignoring concept "{name}". '
f'The following error was raised: "{err}"'
)
concepts_with_errors["wrongly_defined"].append(name)
continue
except NoSuchLabelError:
pass

if pd.isna(row["subClassOf"]):
if row["subClassOf"] == "nan":
if not force:
raise ExcelError(f"{row[0]} has no subClassOf")
parent_names = [] # Should be "owl:Thing"
concepts_with_errors["missing_parents"].append(name)
else:
parent_names = str(row["subClassOf"]).split(";")

Expand All @@ -191,6 +236,9 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
f'Invalid parents for "{name}": '
f'"{parent_name}".'
)
concepts_with_errors["invalid_parents"].append(
name
)
break
raise ExcelError(
f'Invalid parents for "{name}": {exc}\n'
Expand Down Expand Up @@ -276,6 +324,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
" Will continue without these."
)
remaining_rows = False
concepts_with_errors["nonadded_concepts"] = unadded
else:
raise ExcelError(
f"Not able to add the following concepts: {unadded}."
Expand Down Expand Up @@ -303,6 +352,7 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
f"Property to be Evaluated: {prop}. "
f"Error is {exc}."
)
concepts_with_errors["errors_in_properties"].append(name)
except NoSuchLabelError as exc:
msg = (
f"Error in Property assignment for: {concept}. "
Expand All @@ -311,6 +361,9 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
)
if force is True:
warnings.warn(msg)
concepts_with_errors["errors_in_properties"].append(
name
)
else:
raise ExcelError(msg) from exc

Expand All @@ -319,7 +372,13 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
name_policy="uuid", name_prefix="EMMO_", class_docstring="elucidation"
)
onto.dir_label = False
return onto, catalog
concepts_with_errors = {
key: set(value) for key, value in concepts_with_errors.items()
}
concepts_with_errors["in_imported_ontologies"] = concepts_with_errors[
"already_defined"
].intersection(imported_concepts)
return onto, catalog, concepts_with_errors


def get_metadata_from_dataframe( # pylint: disable=too-many-locals,too-many-branches,too-many-statements
Expand Down
17 changes: 16 additions & 1 deletion tests/test_excelparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,20 @@ def test_excelparser(repo_dir: "Path") -> None:

onto = get_ontology(str(ontopath)).load()
xlspath = repo_dir / "tests" / "testonto" / "excelparser" / "onto.xlsx"
ontology, catalog = create_ontology_from_excel(xlspath, force=True)
ontology, catalog, errors = create_ontology_from_excel(xlspath, force=True)
assert onto == ontology

assert errors["already_defined"] == {"Atom", "Pattern"}
assert errors["in_imported_ontologies"] == {"Atom"}
assert errors["wrongly_defined"] == {"Temporal Boundary"}
assert errors["missing_parents"] == {"SpatioTemporalBoundary"}
assert errors["invalid_parents"] == {
"TemporalPattern",
"SubSubgrainBoundary",
"SubgrainBoundary",
}
assert errors["nonadded_concepts"] == {
"Atom",
"Pattern",
"Temporal Boundary",
}
Binary file modified tests/testonto/excelparser/onto.xlsx
Binary file not shown.

0 comments on commit 5d2971f

Please sign in to comment.