Skip to content

Commit

Permalink
fix: Fix error caused by running all tests at once
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Jan 23, 2023
1 parent bd97890 commit 5e46202
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 41 deletions.
1 change: 1 addition & 0 deletions src/textdescriptives/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .pos_proportions import POSProportions # noqa: F401
from .quality import Quality # noqa: F401
from .readability import Readability # noqa: F401
from .information_theory import InformationTheory # noqa: F401
8 changes: 5 additions & 3 deletions src/textdescriptives/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ def extract_dict(
extracted_metrics["text"] = docs.text
for component in metrics:
if component == "quality":
extracted_metrics.update(__get_quality(docs))
metric = __get_quality(docs)
elif component == "descriptive_stats":
extracted_metrics.update(__get_descriptive_stats_dict(docs))
metric = __get_descriptive_stats_dict(docs)
else:
extracted_metrics.update(getattr(docs._, component))
metric = getattr(docs._, component)
if metric:
extracted_metrics.update(metric)

return [extracted_metrics]

Expand Down
24 changes: 1 addition & 23 deletions src/textdescriptives/load_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,7 @@ def __call__(self, doc: Doc):
return doc


@Language.factory(
"textdescriptives/all",
assigns=[
"doc._.readability",
"doc._.dependency_distance",
"doc._.token_length",
"doc._.sentence_length",
"doc._.n_sentences",
"doc._.n_tokens",
"doc._.descriptive_stats",
"doc._.pos_proportions",
"doc._.quality",
"doc._.syllables",
"doc._.counts",
"doc._.coherence",
"doc._.first_order_coherence_values",
"doc._.second_order_coherence_values",
"doc._.passed_quality_check",
"doc._._n_sentences",
"doc._._n_tokens",
"doc._._n_syllables",
], # intentionally not assigning span attributes
)
@Language.factory("textdescriptives/all")
def create_textdescriptives_component(nlp: Language, name: str):
components = [
k
Expand Down
39 changes: 24 additions & 15 deletions src/textdescriptives/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@
from wasabi import msg


def get_valid_metrics() -> set:
"""Get valid metrics for extractor.
Returns:
set: Set of valid metrics
"""

# extract textdescriptive components from the list of spacy Language factory
return {
k.split("/")[1]
for k in Language.factories.keys()
if k.startswith("textdescriptives")
}


def get_doc_assigns(metric: str) -> List[str]:
"""Get doc extension attributes for a given metric.
Expand All @@ -16,6 +31,15 @@ def get_doc_assigns(metric: str) -> List[str]:
"""
# extract the assign names from the factory meta (this assumes that the doc._.
# only includes elements which are also extracted as a part of the dataframe.
if metric == "all":
return [
col[6:]
for component in get_valid_metrics()
for col in Language.get_factory_meta(
f"textdescriptives/{component}",
).assigns
if col.startswith("doc._.")
]
return [
col[6:]
for col in Language.get_factory_meta(f"textdescriptives/{metric}").assigns
Expand Down Expand Up @@ -49,21 +73,6 @@ def get_token_assigns(metric: str) -> List[str]:
]


def get_valid_metrics() -> set:
"""Get valid metrics for extractor.
Returns:
set: Set of valid metrics
"""

# extract textdescriptive components from the list of spacy Language factory
return {
k.split("/")[1]
for k in Language.factories.keys()
if k.startswith("textdescriptives")
}


def load_sms_data():
"""Load the sms dataset
https://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection."""
Expand Down
3 changes: 3 additions & 0 deletions tests/test_information.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import pytest
import spacy

import textdescriptives as td
from textdescriptives.components.information_theory import (
entropy_getter,
per_word_perplexity_getter,
perplexity_getter,
set_lexeme_prob_table,
)
from textdescriptives.utils import _remove_textdescriptives_extensions


@pytest.fixture
Expand All @@ -31,6 +33,7 @@ def test_unigram_information_metrics(nlp): # noqa F811


def test_extract_df(nlp): # noqa F811
_remove_textdescriptives_extensions()
nlp.add_pipe("textdescriptives/information_theory")
doc = nlp("This is a very likely sentence.")

Expand Down

0 comments on commit 5e46202

Please sign in to comment.