convert run_analysis to run_single_analysis, such that the function o…

…nly takes in one document and update docstrings and added test
dhmit · Jul 22, 2021 · 2003ce6 · 2003ce6
1 parent 0f817ba
commit 2003ce6
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 29 deletions.
diff --git a/backend/app/analysis/frequency.py b/backend/app/analysis/frequency.py
@@ -1,5 +1,6 @@
 from collections import Counter
 
+
 def _get_gender_word_frequencies_relative(gender_word_counts):
     """
     A private helper function that examines identifier counts keyed to Gender instances,
@@ -30,33 +31,34 @@ def _get_gender_word_frequencies_relative(gender_word_counts):
 
     return output
 
-def _run_analysis(texts, genders):
+
+def run_single_analysis(doc_obj, genders):
     """
-    A private helper method for running the primary frequency analysis.
-    This method generates three dictionaries: one (count) keying Document instances
-    to Gender instances to Counter instances representing the total number of instances
-    of each Gender's pronouns in a given Document; one (frequency) keying Document instances
-    to Gender instances to dictionaries of the shape {str:float} representing the total number
-    of instances of each Gender's pronouns over the total word count of that Document; and
-    one (relative) keying Document instances to Gender instances to dictionaries of the shape
-    {str:float} representing the relative percentage of Gender pronouns across all Gender
-    instances in a given Document instance.
-
-    :param texts: a list of strings presenting the documents
-    :param genders: a list of strings presenting the pronouns
-    :return: :return: a tuple containing three dictionaries
+    This method generates a dictionary that includes a Counter (count) that keys
+    Document instances to Gender instances to Counter instances representing the total
+    number of instances of each Gender's pronouns in a given Document, a dictionary (frequency)
+    keying Document instances to Gender instances to dictionaries of the shape {str:float}
+    representing the total number of instances of each Gender's pronouns over the total word count
+    of that Document; and a dictionary (relative) keying Document instances to Gender instances
+    to dictionaries of the shape {str:float} representing the relative percentage of Gender
+    pronouns across all Gender instances in a given Document instance.
+
+    :param doc_obj: an instance of the Document model
+    :param genders: a list of Gender objects
+    :return: a dictionary containing the frequency analyses of the Document instance
     """
-    count = {}
-    frequencies = {}
-    relatives = {}
-
-    for document in texts:
-        count[document] = Counter()
-        frequencies[document] = {}
-        relatives[document] = {}
-        for gender in genders:
-            count[document][gender] = document.get_count_of_words(gender.pronouns)
-            frequencies[document][gender] = document.get_word_freqs(gender.pronouns)
-        relatives[document] = _get_gender_word_frequencies_relative(count[document])
-
-    return count, frequencies, relatives
+    count = Counter()
+    frequency = {}
+
+    for gender in genders:
+        count[gender.label] = doc_obj.get_count_of_words(gender.pronouns)
+        frequency[gender.label] = doc_obj.get_word_freqs(gender.pronouns)
+    relative = _get_gender_word_frequencies_relative(count)
+
+    output = {
+        'count': count,
+        'frequency': frequency,
+        'relative': relative
+    }
+
+    return output
diff --git a/backend/app/tests.py b/backend/app/tests.py
@@ -12,7 +12,10 @@
     Corpus,
     Gender,
 )
-from .analysis import proximity
+from .analysis import (
+    proximity,
+    frequency
+)
 
 
 class PronounSeriesTestCase(TestCase):
@@ -196,11 +199,44 @@ def test_update_metadata(self):
         self.assertEqual(doc.new_attributes['cookies'], 'chocolate chip')
         self.assertEqual(doc.word_count, 9)
 
+
 class FrequencyTestCase(TestCase):
     """
     Test cases for the frequency analysis
     """
+    def setUp(self):
+        text1 = """She took a lighter out of her purse and handed it over to him.
+            He lit his cigarette and took a deep drag from it, and then began
+            his speech which ended in a proposal. Her tears drowned the ring."""
+        Document.objects.create_document(title='doc1', year=2021, text=text1)
 
+    def test_single_frequency(self):
+        doc1 = Document.objects.get(title='doc1')
+        male = Gender.objects.get(pk=1, label='Male')
+        female = Gender.objects.get(pk=2, label='Female')
+        they = Gender.objects.get(pk=3, label='Nonbinary')
+        result = frequency.run_single_analysis(doc1, [male, female, they])
+        expected = {
+            'count': Counter({
+                'Male': Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}),
+                'Female': Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}),
+                'Nonbinary': Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}),
+            'frequency': {
+                'Male': {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0},
+                'Female': {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0},
+                'Nonbinary': {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}},
+            'relative': {
+                'Male': {
+                    'his': 0.2857142857142857,
+                    'him': 0.14285714285714285,
+                    'he': 0.14285714285714285,
+                    'himself': 0.0},
+                'Female': {
+                    'herself': 0.0,
+                    'she': 0.14285714285714285,
+                    'her': 0.2857142857142857, 'hers': 0.0},
+                'Nonbinary': {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}}
+        self.assertEqual(expected, result)
 
 
 class CorpusTestCase(TestCase):