Improve Corpus model (#40)

This PR focuses on improving the Corpus model by introducing some of the optimizations for Document iteration described in PR #26. It also adds some console and string representation for the Corpus and Document models as well as updating some of their docstrings. Furthermore, this PR includes the id field in the serialization of `PronounSeries` and `Gender` instances.
dhmit · Jul 15, 2021 · 9b828d9 · 9b828d9
1 parent 73eefec
commit 9b828d9
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 34 deletions.
diff --git a/backend/app/models.py b/backend/app/models.py
@@ -95,13 +95,6 @@ def __str__(self):
 
         return self.identifier + '-series'
 
-    def __hash__(self):
-        """
-        Makes the `PronounSeries` class hashable
-        """
-
-        return self.identifier.__hash__()
-
     def __eq__(self, other):
         """
         Determines whether two `PronounSeries` are equal. Note that they are only equal if
@@ -157,10 +150,10 @@ def __repr__(self):
         """
         :return: A console-friendly representation of the gender
         >>> Gender('Female')
-        <Female>
+        <Female (id=1)>
         """
 
-        return f'<{self.label}>'
+        return f'<{self.label} (id={self.pk})>'
 
     def __str__(self):
         """
@@ -171,13 +164,6 @@ def __str__(self):
 
         return self.label
 
-    def __hash__(self):
-        """
-        Allows the Gender object to be hashed
-        """
-
-        return self.label.__hash__()
-
     def __eq__(self, other):
         """
         Performs a check to see whether two `Gender` objects are equivalent. This is true if and
@@ -270,8 +256,8 @@ def obj(self):
 
 class Document(models.Model):
     """
-    This model will hold the full text and
-    metadata (author, title, publication date, etc.) of a document
+    This model holds the full text and
+    metadata (author, title, publication date, etc.) of a document.
     """
     author = models.CharField(max_length=255, blank=True)
     year = models.IntegerField(null=True, blank=True)
@@ -285,6 +271,19 @@ class Document(models.Model):
 
     objects = DocumentManager()
 
+    def __repr__(self):
+        """
+        :return: A console-friendly representation of a `Document` object.
+        """
+        return f'<Document {self.pk}>'
+
+    def __str__(self):
+        """
+        :return: A string representation of a `Document` object.
+        """
+        title = self.title if self.title else '(No title)'
+        return f'Document {self.pk}: {title}'
+
     def _clean_quotes(self):
         """
         Scans through the text and replaces all of the smart quotes and apostrophes with their
@@ -303,7 +302,7 @@ def get_tokenized_text_wc_and_pos(self):
         and converting everything to lowercase.
 
         :param self: The Document to tokenize
-        :return: none
+        :return: None
         """
         self._clean_quotes()
         tokens = nltk.word_tokenize(self.text)
@@ -493,8 +492,8 @@ def update_metadata(self, new_metadata):
 
 class Corpus(models.Model):
     """
-    This model will hold associations to other Documents and their
-    metadata (author, title, publication date, etc.)
+    This model holds associations to other Documents and their
+    metadata (author, title, publication date, etc.).
     """
     title = models.CharField(max_length=30)
     description = models.CharField(max_length=500, blank=True)
@@ -503,28 +502,40 @@ class Corpus(models.Model):
     class Meta:
         verbose_name_plural = "Corpora"
 
+    def __repr__(self):
+        """
+        :return: A console-friendly representation of a `Corpus` object.
+        """
+        return f'<Corpus {self.pk}: {self.title}>'
+
     def __str__(self):
-        """Returns the title of the corpus"""
+        """
+        Specifies the `Corpus`'s title as its string representation.
+        :return: A string representation of a `Corpus` object.
+        """
         return self.title
 
     def __len__(self):
-        """Returns the number of documents associated with this corpus"""
-        return len(self.document_set.all())
+        """
+        :return: The number of documents associated with this `Corpus` object as an int.
+        """
+        return self.documents.count()
 
     def __iter__(self):
-        """Yields each document associated with the corpus"""
-        for this_document in self.document_set.all():
-            yield this_document
+        """
+        Yields each `Document` associated with the `Corpus` object.
+        """
+        for doc_id in self.documents.values_list('pk', flat=True):
+            yield self.documents.get(pk=doc_id)
 
     def __eq__(self, other):
-        """Returns true if both of the corpora are associated with the same documents"""
+        """
+        :return: True if both of the corpora are associated with the same `Document`s.
+        """
         if not isinstance(other, Corpus):
             raise NotImplementedError("Only a Corpus can be compared to another Corpus.")
 
         if len(self) != len(other):
             return False
 
-        if set(self.document_set.all()) == set(other.document_set.all()):
-            return True
-        else:
-            return False
+        return list(self.documents.values_list('pk', flat=True)) == list(other.documents.values_list('pk', flat=True))
diff --git a/backend/app/serializers.py b/backend/app/serializers.py
@@ -20,7 +20,7 @@ class PronounSeriesSerializer(serializers.ModelSerializer):
 
     class Meta:
         model = PronounSeries
-        fields = ['identifier', 'subj', 'obj', 'pos_det', 'pos_pro', 'reflex', 'all_pronouns']
+        fields = ['id', 'identifier', 'subj', 'obj', 'pos_det', 'pos_pro', 'reflex', 'all_pronouns']
 
 
 class GenderSerializer(serializers.ModelSerializer):
@@ -31,7 +31,7 @@ class GenderSerializer(serializers.ModelSerializer):
 
     class Meta:
         model = Gender
-        fields = ['label', 'pronoun_series', 'pronouns', 'subj', 'obj']
+        fields = ['id', 'label', 'pronoun_series', 'pronouns', 'subj', 'obj']
 
 
 class DocumentSerializer(serializers.ModelSerializer):