Skip to content

Commit

Permalink
convert run_analysis to run_single_analysis, such that the function o…
Browse files Browse the repository at this point in the history
…nly takes in one document and update docstrings and added test
  • Loading branch information
phuang00 committed Jul 22, 2021
1 parent 0f817ba commit 2003ce6
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 29 deletions.
58 changes: 30 additions & 28 deletions backend/app/analysis/frequency.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import Counter


def _get_gender_word_frequencies_relative(gender_word_counts):
"""
A private helper function that examines identifier counts keyed to Gender instances,
Expand Down Expand Up @@ -30,33 +31,34 @@ def _get_gender_word_frequencies_relative(gender_word_counts):

return output

def _run_analysis(texts, genders):

def run_single_analysis(doc_obj, genders):
"""
A private helper method for running the primary frequency analysis.
This method generates three dictionaries: one (count) keying Document instances
to Gender instances to Counter instances representing the total number of instances
of each Gender's pronouns in a given Document; one (frequency) keying Document instances
to Gender instances to dictionaries of the shape {str:float} representing the total number
of instances of each Gender's pronouns over the total word count of that Document; and
one (relative) keying Document instances to Gender instances to dictionaries of the shape
{str:float} representing the relative percentage of Gender pronouns across all Gender
instances in a given Document instance.
:param texts: a list of strings presenting the documents
:param genders: a list of strings presenting the pronouns
:return: :return: a tuple containing three dictionaries
This method generates a dictionary that includes a Counter (count) that keys
Document instances to Gender instances to Counter instances representing the total
number of instances of each Gender's pronouns in a given Document, a dictionary (frequency)
keying Document instances to Gender instances to dictionaries of the shape {str:float}
representing the total number of instances of each Gender's pronouns over the total word count
of that Document; and a dictionary (relative) keying Document instances to Gender instances
to dictionaries of the shape {str:float} representing the relative percentage of Gender
pronouns across all Gender instances in a given Document instance.
:param doc_obj: an instance of the Document model
:param genders: a list of Gender objects
:return: a dictionary containing the frequency analyses of the Document instance
"""
count = {}
frequencies = {}
relatives = {}

for document in texts:
count[document] = Counter()
frequencies[document] = {}
relatives[document] = {}
for gender in genders:
count[document][gender] = document.get_count_of_words(gender.pronouns)
frequencies[document][gender] = document.get_word_freqs(gender.pronouns)
relatives[document] = _get_gender_word_frequencies_relative(count[document])

return count, frequencies, relatives
count = Counter()
frequency = {}

for gender in genders:
count[gender.label] = doc_obj.get_count_of_words(gender.pronouns)
frequency[gender.label] = doc_obj.get_word_freqs(gender.pronouns)
relative = _get_gender_word_frequencies_relative(count)

output = {
'count': count,
'frequency': frequency,
'relative': relative
}

return output
38 changes: 37 additions & 1 deletion backend/app/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
Corpus,
Gender,
)
from .analysis import proximity
from .analysis import (
proximity,
frequency
)


class PronounSeriesTestCase(TestCase):
Expand Down Expand Up @@ -196,11 +199,44 @@ def test_update_metadata(self):
self.assertEqual(doc.new_attributes['cookies'], 'chocolate chip')
self.assertEqual(doc.word_count, 9)


class FrequencyTestCase(TestCase):
"""
Test cases for the frequency analysis
"""
def setUp(self):
text1 = """She took a lighter out of her purse and handed it over to him.
He lit his cigarette and took a deep drag from it, and then began
his speech which ended in a proposal. Her tears drowned the ring."""
Document.objects.create_document(title='doc1', year=2021, text=text1)

def test_single_frequency(self):
doc1 = Document.objects.get(title='doc1')
male = Gender.objects.get(pk=1, label='Male')
female = Gender.objects.get(pk=2, label='Female')
they = Gender.objects.get(pk=3, label='Nonbinary')
result = frequency.run_single_analysis(doc1, [male, female, they])
expected = {
'count': Counter({
'Male': Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}),
'Female': Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}),
'Nonbinary': Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}),
'frequency': {
'Male': {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0},
'Female': {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0},
'Nonbinary': {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}},
'relative': {
'Male': {
'his': 0.2857142857142857,
'him': 0.14285714285714285,
'he': 0.14285714285714285,
'himself': 0.0},
'Female': {
'herself': 0.0,
'she': 0.14285714285714285,
'her': 0.2857142857142857, 'hers': 0.0},
'Nonbinary': {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}}
self.assertEqual(expected, result)


class CorpusTestCase(TestCase):
Expand Down

0 comments on commit 2003ce6

Please sign in to comment.