misc: improvement from review

HLasse · Dec 15, 2022 · b75c219 · b75c219
1 parent 153764b
commit b75c219
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 52 deletions.
diff --git a/textdescriptives/components/coherence.py b/textdescriptives/components/coherence.py
@@ -57,24 +57,38 @@ def coherence(self, doc: Doc) -> None:
             },
         )
 
-    def _first_order_coherence(self, doc: Doc) -> List[float]:
+    @staticmethod
+    def _first_order_coherence(doc: Doc) -> List[float]:
         """Calculate first order coherence for a `Doc`, i.e. the semantic similarity
         between consecutive sentences."""
-        return self._n_order_coherence(doc=doc, order=1)
+        return n_order_coherence(doc=doc, order=1)
 
-    def _second_order_coherence(self, doc: Doc) -> List[float]:
+    @staticmethod
+    def _second_order_coherence(doc: Doc) -> List[float]:
         """Calculate second order coherence for a `Doc`, i.e. the semantic similarity
         between sentences that are two sentences apart."""
-        return self._n_order_coherence(doc, order=2)
-
-    def _n_order_coherence(self, doc: Doc, order: int):
-        """Calculate coherence for a `Doc` for a given order."""
-        sents = list(doc.sents)
-        if len(sents) < order + 1:
-            return np.nan
-        similarities: List[float] = []
-        for i, sent in enumerate(sents):
-            if i == len(sents) - order:
-                break
-            similarities.append(sent.similarity(sents[i + order]))
-        return similarities
+        return n_order_coherence(doc, order=2)
+
+
+def n_order_coherence(doc: Doc, order: int) -> List[float]:
+    """Calculate coherence for a `Doc` for a given order.
+
+    Args:
+        doc: A `Doc` object.
+        order: The order of coherence to calculate. For example, order=1 will
+            calculate the semantic similarity between consecutive sentences. And
+            order=2 will calculate the semantic similarity between sentences that
+            are two sentences apart.
+
+    Returns:
+        A list of floats representing the semantic similarity between sentences
+    """
+    sents = list(doc.sents)
+    if len(sents) < order + 1:
+        return [np.nan]
+    similarities: List[float] = []
+    for i, sent in enumerate(sents):
+        if i == len(sents) - order:
+            break
+        similarities.append(sent.similarity(sents[i + order]))
+    return similarities
diff --git a/textdescriptives/tests/test_coherence.py b/textdescriptives/tests/test_coherence.py
@@ -6,35 +6,18 @@
 
 
 @pytest.fixture(scope="function")
-def nlp_small():
+def nlp():
     nlp = spacy.load("en_core_web_sm")
     nlp.add_pipe("textdescriptives.coherence")
     return nlp
 
 
-@pytest.fixture(scope="function")
-def nlp_large():
-    nlp = spacy.load("en_core_web_lg")
-    nlp.add_pipe("textdescriptives.coherence")
-    return nlp
-
-
-def test_coherence_integration(nlp_small):
-    assert "textdescriptives.coherence" == nlp_small.pipe_names[-1]
-
-
-def test_coherence_small_model(nlp_small):
-    doc = nlp_small(
-        "This is a short and simple sentence. Here is yet another one. We need quite a few before these coherences metrics make sense. Rambling, on and on."
-    )
+def test_coherence_integration(nlp):
+    assert "textdescriptives.coherence" == nlp.pipe_names[-1]
 
-    assert doc._.coherence
-    assert doc._.first_order_coherence_values
-    assert doc._.second_order_coherence_values
 
-
-def test_coherence_large_model(nlp_large):
-    doc = nlp_large(
+def test_coherence(nlp):
+    doc = nlp(
         "This is a short and simple sentence. Here is yet another one. We need quite a few before these coherences metrics make sense. Rambling, on and on."
     )
 
@@ -43,25 +26,18 @@ def test_coherence_large_model(nlp_large):
     assert doc._.second_order_coherence_values
 
 
-def test_coherence_small_model_single_sentence(nlp_small):
-    doc = nlp_small("This is a short and simple sentence.")
-
-    assert np.isnan(doc._.first_order_coherence_values).all()
-    assert np.isnan(doc._.second_order_coherence_values).all()
-
-
-def test_coherence_large_model_single_sentence(nlp_large):
-    doc = nlp_large("This is a short and simple sentence.")
+def test_coherence_single_sentence(nlp):
+    doc = nlp("This is a short and simple sentence.")
 
     assert np.isnan(doc._.first_order_coherence_values).all()
     assert np.isnan(doc._.second_order_coherence_values).all()
 
 
-def test_coherence_difference(nlp_large):
-    coherent_doc = nlp_large(
+def test_coherence_difference(nlp):
+    coherent_doc = nlp(
         "We will now talk about animals. Dogs are animals. Cats are animals. Birds are animals. Fish are animals."
     )
-    incoherent_doc = nlp_large(
+    incoherent_doc = nlp(
         "Let's talk about a bunch of things. Houses made of pancakes and dogs talking like humans. Look, the snow is falling."
     )
     assert (
@@ -74,8 +50,8 @@ def test_coherence_difference(nlp_large):
     )
 
 
-def test_coherence_multi_process(nlp_small):
-    docs = nlp_small.pipe(
+def test_coherence_multi_process(nlp):
+    docs = nlp.pipe(
         [
             "This is a short and simple sentence. Here is yet another one. We need quite a few before these coherences metrics make sense. Rambling, on and on.",
             "And another one. That's it. No more.",