Merge branch 'dev' of https://github.com/hlasse/TextDescriptives into…

… dev
HLasse · Jan 2, 2023 · 6a189b1 · 6a189b1
2 parents c1a7978 + 531866b
commit 6a189b1
Showing 1 changed file with 14 additions and 8 deletions.
diff --git a/src/textdescriptives/components/coherence.py b/src/textdescriptives/components/coherence.py
@@ -18,21 +18,27 @@ def n_order_coherence(doc: Doc, order: int) -> List[float]:
     Returns:
         A list of floats representing the semantic similarity between sentences
     """
+    if doc.vector.size == 0:
+        raise ValueError(
+            "Sentence vectors are not available. Thus it is not possible to "
+            + "calculate the coherence between sentences. Please add a component "
+            + "that includes word vectors or sentence embeddings."
+            + "See https://spacy.io/usage/vectors-similarity for more details.",
+        )
+    if not doc.has_annotation("SENT_START"):
+        raise ValueError(
+            "A sentence boundary detector has not been run on this Doc, which is "
+            + "required to calculate coherence. Have you added a model with a "
+            + "sentencizer and word vectors to the pipeline?",
+        )
+
     sents = list(doc.sents)
     if len(sents) < order + 1:
         return [np.nan]
     similarities: List[float] = []
     for i, sent in enumerate(sents):
         if i == len(sents) - order:
             break
-        # check if sentence vector is empty
-        if sent.vector.size == 0:
-            raise ValueError(
-                "Sentence vectors are not available. Thus it is not possible to "
-                + "calculate the coherence between sentences. Please add a component "
-                + "that includes word vectors or sentence embeddings."
-                + "See https://spacy.io/usage/vectors-similarity for more details.",
-            )
         similarities.append(sent.similarity(sents[i + order]))
     return similarities