-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathtest_extractor.py
61 lines (46 loc) · 1.88 KB
/
test_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import textdescriptives as td
import spacy
import pytest
@pytest.fixture(scope="function")
def nlp():
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textdescriptives")
return nlp
def test_extract_df_single_doc(nlp):
doc = nlp("This is just a cute little text. Actually, it's two sentences.")
td.extract_df(doc)
for metric in ["descriptive_stats", "readability", "dependency_distance"]:
td.extract_df(doc, metrics=metric)
def test_extract_df_pipe(nlp):
text = [
"I wonder how well the function works on multiple documents",
"Very exciting to see, don't you think?",
]
docs = nlp.pipe(text)
td.extract_df(docs)
def test_extract_df_subsetters(nlp):
doc = nlp("This is just a cute little text. Actually, it's two sentences.")
df = td.extract_df(doc, include_text=False)
assert "token_length_mean" not in df[td.readability_cols].columns
assert "token_length_mean" not in df[td.dependency_cols].columns
assert "lix" not in df[td.descriptive_stats_cols].columns
def test_extract_df_error(nlp):
doc = nlp("Very brief text")
with pytest.raises(Exception) as e_info:
td.extract_df("This is just a string")
with pytest.raises(Exception) as e_info:
td.extract_df(doc, metrics="not a metric")
with pytest.raises(Exception) as e_info:
td.extract_df(doc, metrics=True)
def test_extract_dict_single_doc(nlp):
doc = nlp("This is just a cute little text. Actually, it's two sentences.")
td.extract_dict(doc)
for metric in ["descriptive_stats", "readability", "dependency_distance"]:
td.extract_dict(doc, metrics=metric)
def test_extract_df_pipe(nlp):
text = [
"I wonder how well the function works on multiple documents",
"Very exciting to see, don't you think?",
]
docs = nlp.pipe(text)
assert len(td.extract_dict(docs)["token_length_mean"]) == 2