chore: pre-commit

HLasse · Feb 14, 2023 · 2e4d069 · 2e4d069
1 parent 4f37b6b
commit 2e4d069
Show file tree

Hide file tree

Showing 15 changed files with 93 additions and 107 deletions.
diff --git a/src/textdescriptives/about.py b/src/textdescriptives/about.py
@@ -1,5 +1,4 @@
-"""About textdescriptives, version number is specified in the setup.cfg
-file."""
+""" About textdescriptives, version number is specified in the setup.cfg file."""
 
 # if python >= 3.8, use importlib.metadata otherwise use pkg_resources
 try:

diff --git a/src/textdescriptives/components/coherence.py b/src/textdescriptives/components/coherence.py
@@ -44,8 +44,8 @@ def n_order_coherence(doc: Doc, order: int) -> List[float]:
 
 
 class Coherence:
-    """Spacy v.3.0 component that adds attributes with coherence to `Doc` and
-    `Span` objects."""
+    """Spacy v.3.0 component that adds attributes with coherence to `Doc` and `Span`
+    objects."""
 
     def __init__(self, nlp: Language):
         """Initialise component."""
@@ -60,23 +60,22 @@ def __init__(self, nlp: Language):
 
     @staticmethod
     def _first_order_coherence(doc: Doc) -> List[float]:
-        """Calculate first order coherence for a `Doc`, i.e. the semantic
-        similarity between consecutive sentences."""
+        """Calculate first order coherence for a `Doc`, i.e. the semantic similarity
+        between consecutive sentences."""
         return n_order_coherence(doc=doc, order=1)
 
     @staticmethod
     def _second_order_coherence(doc: Doc) -> List[float]:
-        """Calculate second order coherence for a `Doc`, i.e. the semantic
-        similarity between sentences that are two sentences apart."""
+        """Calculate second order coherence for a `Doc`, i.e. the semantic similarity
+        between sentences that are two sentences apart."""
         return n_order_coherence(doc, order=2)
 
     def coherence(self, doc: Doc) -> None:
         """Calculate mean semantic coherence for a `Doc` and set the coherence
         attribute.
 
-        Coherence is calculated by taking the mean of the similarity
-        between sentence embeddings. See the documentation for more
-        details.
+        Coherence is calculated by taking the mean of the similarity between sentence
+        embeddings. See the documentation for more details.
         """
         first_order_coherence = self._first_order_coherence(doc)
         second_order_coherence = self._second_order_coherence(doc)

diff --git a/src/textdescriptives/components/dependency_distance.py b/src/textdescriptives/components/dependency_distance.py
@@ -1,4 +1,4 @@
-"""Calculation of statistics related to dependency distance."""
+""" Calculation of statistics related to dependency distance."""
 from typing import Callable
 
 import numpy as np
@@ -7,13 +7,12 @@
 
 
 class DependencyDistance:
-    """spaCy v.3.0 component that adds attributes to `Doc`, `Span`, and `Token`
-    objects relating to dependency distance.
+    """spaCy v.3.0 component that adds attributes to `Doc`, `Span`, and `Token` objects
+    relating to dependency distance.
 
-    Dependency distance can be used as a measure of syntactic
-    complexity, and measures the distance from a word to its head word.
-    For `Doc` objects, dependency distance is calculated on the sentence
-    level.
+    Dependency distance can be used as a measure of syntactic complexity, and measures
+    the distance from a word to its head word. For `Doc` objects, dependency distance is
+    calculated on the sentence level.
     """
 
     def __init__(self, nlp: Language):
@@ -26,9 +25,9 @@ def __init__(self, nlp: Language):
             Doc.set_extension("dependency_distance", getter=self.doc_dependency)
 
     def token_dependency(self, token: Token) -> dict:
-        """Calculate token level dependency distance, i.e. the distance from a
-        token to its head token. Also returns a boolean indicating whether the
-        dependency relation is adjacent to the token.
+        """Calculate token level dependency distance, i.e. the distance from a token to
+        its head token. Also returns a boolean indicating whether the dependency
+        relation is adjacent to the token.
 
         Returns:
             dict: Dictionary with the following keys:
@@ -45,9 +44,9 @@ def token_dependency(self, token: Token) -> dict:
         return {"dependency_distance": dep_dist, "adjacent_dependency": ajd_dep}
 
     def span_dependency(self, span: Span) -> dict:
-        """Aggregates token level dependency distance on the span level by
-        taking the mean of the dependency distance and the proportion of
-        adjacent dependency relations.
+        """Aggregates token level dependency distance on the span level by taking the
+        mean of the dependency distance and the proportion of adjacent dependency
+        relations.
 
         Returns:
             dict: Dictionary with the following keys: dependency_distance_mean:
@@ -63,9 +62,9 @@ def span_dependency(self, span: Span) -> dict:
         }
 
     def doc_dependency(self, doc: Doc) -> dict:
-        """Aggregates token level dependency distance on the document level by
-        taking the mean of the dependency distance and the proportion of
-        adjacent dependency relations on the sentence level.
+        """Aggregates token level dependency distance on the document level by taking
+        the mean of the dependency distance and the proportion of adjacent dependency
+        relations on the sentence level.
 
         Returns:
             dict: Dictionary with the following keys:
@@ -112,9 +111,8 @@ def create_dependency_distance_component(
     nlp: Language,
     name: str,
 ) -> Callable[[Doc], Doc]:
-    """Create spaCy language factory that allows DependencyDistance attributes
-    to be added to a pipe using
-    nlp.add_pipe("textdescriptives/dependency_distance")
+    """Create spaCy language factory that allows DependencyDistance attributes to be
+    added to a pipe using nlp.add_pipe("textdescriptives/dependency_distance")
 
     Adding this component to a pipeline sets the following attributes:
         - `token._.dependency_distance`

diff --git a/src/textdescriptives/components/descriptive_stats.py b/src/textdescriptives/components/descriptive_stats.py
@@ -1,4 +1,4 @@
-"""Calculation of descriptive statistics."""
+""" Calculation of descriptive statistics."""
 from typing import Callable, Dict, Union
 
 import numpy as np
@@ -9,11 +9,11 @@
 
 
 class DescriptiveStatistics:
-    """spaCy v.3.0 component that adds attributes with desriptive statistics to
-    `Doc` and `Span` objects.
+    """spaCy v.3.0 component that adds attributes with desriptive statistics to `Doc`
+    and `Span` objects.
 
-    The attributes relate to token and sentence length, number of
-    syllables, and counts of tokens and sentences.
+    The attributes relate to token and sentence length, number of syllables, and counts
+    of tokens and sentences.
     """
 
     def __init__(self, nlp: Language):
@@ -40,8 +40,7 @@ def __init__(self, nlp: Language):
                 Doc.set_extension(extension_name, getter=getter_fun)
 
     def token_length(self, doc: Union[Doc, Span]) -> dict:
-        """Calculate mean, median and std of token length for a `Doc` or
-        `Span`.
+        """Calculate mean, median and std of token length for a `Doc` or `Span`.
 
         Returns:
             dict: token_length_mean, token_length_median, token_length_std
@@ -88,8 +87,8 @@ def sentence_length(self, doc: Doc) -> dict:
         }
 
     def syllables(self, doc: Doc) -> dict:
-        """Calculate mean, median and std of syllables per token for a `Doc`.
-        Uses `Pyphen` for hyphenation.
+        """Calculate mean, median and std of syllables per token for a `Doc`. Uses
+        `Pyphen` for hyphenation.
 
         Returns:
             dict: syllables_per_token_mean, syllables_per_token_median,
@@ -109,8 +108,8 @@ def syllables(self, doc: Doc) -> dict:
         }
 
     def counts(self, doc: Union[Doc, Span], ignore_whitespace: bool = True) -> dict:
-        """Calculate counts of tokens, unique tokens, and characters for a
-        `Doc` or `Span`. Adds number of sentences for `Doc` objects.
+        """Calculate counts of tokens, unique tokens, and characters for a `Doc` or
+        `Span`. Adds number of sentences for `Doc` objects.
 
         Args:
             ignore_whitespace: if True, whitespace is not counted as a character when

diff --git a/src/textdescriptives/components/information_theory.py b/src/textdescriptives/components/information_theory.py
@@ -1,4 +1,4 @@
-"""Calculate the entropy and perplexity of a corpus."""
+""" Calculate the entropy and perplexity of a corpus."""
 
 from typing import Callable, Dict, Union
 
@@ -31,8 +31,8 @@ def entropy(log_probs=np.ndarray) -> float:
 def perplexity(entropy: float) -> float:
     """Calculates the perplexity.
 
-    Calculated as exp(H(p)), where H is the entropy using a base e and p
-    is the probabilites of a given word.
+    Calculated as exp(H(p)), where H is the entropy using a base e and p is the
+    probabilites of a given word.
     """
     return np.exp(entropy)
 
@@ -46,8 +46,8 @@ def entropy_getter(doc: Union[Doc, Span], log_prob_attr: str = "prob") -> float:
 def perplexity_getter(doc: Union[Doc, Span]) -> float:
     """Calculates the perplexity of a doc.
 
-    Calculated as exp(H(p)), where H is the entropy using a base e and p
-    is the probabilites of a given word.
+    Calculated as exp(H(p)), where H is the entropy using a base e and p is the
+    probabilites of a given word.
     """
     # check if it has the attribute entropy
     if hasattr(doc._, "entropy"):
@@ -82,8 +82,8 @@ def set_docspan_extension(
 
 
 class InformationTheory:
-    """SpaCy component for adding information theoretic metrics such as entropy
-    and perplexity."""
+    """SpaCy component for adding information theoretic metrics such as entropy and
+    perplexity."""
 
     def __init__(self, nlp: Language, name: str, force: bool) -> None:
         self.name = name

diff --git a/src/textdescriptives/components/pos_proportions.py b/src/textdescriptives/components/pos_proportions.py
@@ -1,4 +1,4 @@
-"""Calculation of statistics that require a pos-tagger in the pipeline."""
+""" Calculation of statistics that require a pos-tagger in the pipeline."""
 
 from typing import Callable, Counter, Union
 
@@ -7,8 +7,8 @@
 
 
 class POSProportions:
-    """spaCy v.3.0 component that adds attributes for POS statistics to `Doc`
-    and `Span` objects."""
+    """spaCy v.3.0 component that adds attributes for POS statistics to `Doc` and
+    `Span` objects."""
 
     def __init__(self, nlp: Language, use_pos: bool):
         """Initialise components.
@@ -26,8 +26,8 @@ def __init__(self, nlp: Language, use_pos: bool):
             Span.set_extension("pos_proportions", getter=self.pos_proportions)
 
     def pos_proportions(self, text: Union[Doc, Span]) -> dict:
-        """Calculates the proportion of tokens in a `Doc`|`Span` that are
-        tagged with each POS tag.
+        """Calculates the proportion of tokens in a `Doc`|`Span` that are tagged with
+        each POS tag.
 
         Returns:
             Dict containing {pos_prop_POSTAG: proportion of all tokens tagged with

diff --git a/src/textdescriptives/components/quality.py b/src/textdescriptives/components/quality.py
@@ -1,4 +1,4 @@
-"""Component for calculating quality metrics."""
+""" Component for calculating quality metrics."""
 from collections import Counter, defaultdict
 from typing import Callable, Dict, List, Mapping, Optional, Tuple, Union
 
@@ -37,8 +37,8 @@ def mean_word_length(span: Union[Doc, Span]) -> float:
 
 
 def alpha_ratio(span: Union[Doc, Span]) -> float:
-    """The percentage of spacy tokens in this document which contain at leat
-    one alphabetic character.
+    """The percentage of spacy tokens in this document which contain at leat one
+    alphabetic character.
 
     Args:
         span (Union[Doc, Span]): A spaCy Doc or Span object
@@ -63,8 +63,7 @@ def proportion_bullet_points(  # pylint: disable=dangerous-default-value
     span: Union[Doc, Span],
     bullet_point: set = {"-", "*"},
 ) -> float:
-    """Calculate the proportion of lines which start with a bullet points in a
-    span.
+    """Calculate the proportion of lines which start with a bullet points in a span.
 
     Args:
         span (Union[Doc, Span]): A spaCy Doc or Span object
@@ -240,9 +239,9 @@ def duplicate_ngram_fraction(
     span: Union[Span, Doc],
     ngram_range: Tuple[int, int],
 ) -> Dict[int, float]:
-    """Calculates the character fraction of duplicate n-gram over the overall
-    text, taking care not to count overlapping n-grams twice. This does not
-    include spaces between the n-grams.
+    """Calculates the character fraction of duplicate n-gram over the overall text,
+    taking care not to count overlapping n-grams twice. This does not include spaces
+    between the n-grams.
 
     Args:
         span (Union[Span, Doc]): A spaCy Span or Doc object.
@@ -352,11 +351,9 @@ def oov_ratio(span: Union[Span, Doc], vocab: Optional[Mapping] = None) -> float:
 
 
 class Quality:
-    """spaCy component for adding text quality metrics to the `Doc` and `Span`
-    objects.
+    """spaCy component for adding text quality metrics to the `Doc` and `Span` objects.
 
-    Extracts metrics and returns them as a dictionary as the ._.quality
-    attribute.
+    Extracts metrics and returns them as a dictionary as the ._.quality attribute.
     """
 
     def __init__(  # pylint: disable=dangerous-default-value

diff --git a/src/textdescriptives/components/quality_data_classes.py b/src/textdescriptives/components/quality_data_classes.py
@@ -1,4 +1,4 @@
-"""Data classes used for the quality component."""
+""" Data classes used for the quality component."""
 from typing import Any, Dict, Optional, Tuple, Union
 
 from pydantic import BaseModel, Extra, Field
@@ -7,10 +7,9 @@
 
 
 class ThresholdsOutput(BaseModel):
-    """An output which contains an three items. 1) a thresholds which is either
-    an interval or a accepted boolean value. 2) a value which is the value of
-    the metric. 3) a boolean which is True if the value is within the
-    thresholds.
+    """An output which contains an three items. 1) a thresholds which is either an
+    interval or a accepted boolean value. 2) a value which is the value of the metric.
+    3) a boolean which is True if the value is within the thresholds.
 
     Example:
         >>> t_out = ThresholdsOutput(threshold=(0, 2), value=2)
@@ -257,8 +256,8 @@ def __repr_str__(self, join_str: str) -> str:
         )
 
     def to_flat_value_dict(self) -> Dict[str, Any]:
-        """Creates a flat dictionary representation of the object to allow for
-        easy easy conversion to a pandas DataFrame."""
+        """Creates a flat dictionary representation of the object to allow for easy
+        easy conversion to a pandas DataFrame."""
         flat_dict = {"passed_quality_check": self.passed}
 
         for k, v in self.__dict__.items():