Skip to content

Commit

Permalink
feat: visualize similarity metrics by predicate
Browse files Browse the repository at this point in the history
Implement a visualization to assess the accuracy of different OntoGPT
configurations relative to a baseline standard for each predicate 
represented by OntoGPT templates. Use a simple box plot to 
effectively display and compare similarity metrics across predicate
values.
  • Loading branch information
clnsmth authored Dec 20, 2024
1 parent 513e5e5 commit 1bd1184
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 0 deletions.
74 changes: 74 additions & 0 deletions src/spinneret/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from daiquiri import getLogger
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from spinneret.utilities import load_workbook, compress_uri
from spinneret.workbook import delete_duplicate_annotations, delete_unannotated_rows

Expand Down Expand Up @@ -490,3 +491,76 @@ def is_grounded(data: list) -> bool:
data = [d for d in data if not pd.isna(d)]

return any("http" in s for s in data)


def plot_similarity_scores_by_predicate(
benchmark_results: pd.DataFrame,
test_dir_path: str,
metric: str,
output_file: str = None,
) -> None:
"""
To see predicate level performance for an OntoGPT test configuration
:param benchmark_results: The return value from the
`benchmark_against_standard` function.
:param test_dir_path: Path to the test directory containing the test
annotated workbook files for the desired configuration. This should be
a value from the `test_dir` column of the benchmark_results DataFrame,
which indicates the configuration comparison to plot.
:param metric: The metric to plot. This should be a column name from the
benchmark_results DataFrame, e.g. "average_score", "best_score", etc.
:param output_file: The path to save the plot to, as a PNG file.
:return: None
"""
# Subset the benchmark results dataframe to only include the desired
# columns: test_dir, metric
df = benchmark_results[benchmark_results["test_dir"] == test_dir_path][
["predicate_value", metric]
]

# Remove empty rows where the metric is 0 or NaN to avoid plotting them
df = df.dropna(subset=[metric])
df = df[df[metric] != 0]

# Order the "predicate_value" column to ensure the plot's x-axis is ordered
# correctly
df["predicate_value"] = pd.Categorical(
df["predicate_value"],
[
"env_broad_scale",
"env_local_scale",
"contains process",
"environmental material",
"contains measurements of type",
"uses standard",
"usesMethod",
"research topic",
],
)

plt.figure(figsize=(10, 6))
grouped_data_long = df.groupby("predicate_value")[metric].apply(list)
plt.boxplot(
grouped_data_long.values, labels=grouped_data_long.index, showmeans=True
)

# Add individual data points (jittered)
for i, group_data in enumerate(grouped_data_long):
x = np.random.normal(i + 1, 0.08, size=len(group_data)) # Jitter x-values
plt.plot(x, group_data, "o", alpha=0.25, color="grey")

configuration = os.path.basename(test_dir_path)

plt.xlabel("Predicate")
plt.ylabel("Score")
title = (
f"Similarity Score '{metric}' Against Benchmark Standard for "
f"Configuration '{configuration}'"
)
plt.title(title)
plt.xticks(rotation=-20)
plt.tight_layout()
if output_file:
plt.savefig(output_file, dpi=300)
plt.show()
11 changes: 11 additions & 0 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
get_shared_ontology,
get_grounding_rates,
is_grounded,
plot_similarity_scores_by_predicate,
)
from spinneret.utilities import is_url

Expand Down Expand Up @@ -287,3 +288,13 @@ def test_is_grounded():

# But lists with strings starting with "http" are grounded
assert is_grounded(["http://example.com"])


@pytest.mark.skip(reason="Manual inspection required")
def test_plot_similarity_scores_by_predicate(termset_similarity_score_dataframe):
"""Test the plot_similarity_scores_by_predicate function"""
plot_similarity_scores_by_predicate(
benchmark_results=termset_similarity_score_dataframe,
test_dir_path="tests/data/benchmark/test_a",
metric="average_score",
)

0 comments on commit 1bd1184

Please sign in to comment.