Skip to content

Commit

Permalink
Improve Corpus model (#40)
Browse files Browse the repository at this point in the history
This PR focuses on improving the Corpus model by introducing some of the optimizations for Document iteration described in PR #26. It also adds some console and string representation for the Corpus and Document models as well as updating some of their docstrings. Furthermore, this PR includes the id field in the serialization of `PronounSeries` and `Gender` instances.
  • Loading branch information
joshfeli authored Jul 15, 2021
1 parent 73eefec commit 9b828d9
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 34 deletions.
75 changes: 43 additions & 32 deletions backend/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,6 @@ def __str__(self):

return self.identifier + '-series'

def __hash__(self):
"""
Makes the `PronounSeries` class hashable
"""

return self.identifier.__hash__()

def __eq__(self, other):
"""
Determines whether two `PronounSeries` are equal. Note that they are only equal if
Expand Down Expand Up @@ -157,10 +150,10 @@ def __repr__(self):
"""
:return: A console-friendly representation of the gender
>>> Gender('Female')
<Female>
<Female (id=1)>
"""

return f'<{self.label}>'
return f'<{self.label} (id={self.pk})>'

def __str__(self):
"""
Expand All @@ -171,13 +164,6 @@ def __str__(self):

return self.label

def __hash__(self):
"""
Allows the Gender object to be hashed
"""

return self.label.__hash__()

def __eq__(self, other):
"""
Performs a check to see whether two `Gender` objects are equivalent. This is true if and
Expand Down Expand Up @@ -270,8 +256,8 @@ def obj(self):

class Document(models.Model):
"""
This model will hold the full text and
metadata (author, title, publication date, etc.) of a document
This model holds the full text and
metadata (author, title, publication date, etc.) of a document.
"""
author = models.CharField(max_length=255, blank=True)
year = models.IntegerField(null=True, blank=True)
Expand All @@ -285,6 +271,19 @@ class Document(models.Model):

objects = DocumentManager()

def __repr__(self):
"""
:return: A console-friendly representation of a `Document` object.
"""
return f'<Document {self.pk}>'

def __str__(self):
"""
:return: A string representation of a `Document` object.
"""
title = self.title if self.title else '(No title)'
return f'Document {self.pk}: {title}'

def _clean_quotes(self):
"""
Scans through the text and replaces all of the smart quotes and apostrophes with their
Expand All @@ -303,7 +302,7 @@ def get_tokenized_text_wc_and_pos(self):
and converting everything to lowercase.
:param self: The Document to tokenize
:return: none
:return: None
"""
self._clean_quotes()
tokens = nltk.word_tokenize(self.text)
Expand Down Expand Up @@ -493,8 +492,8 @@ def update_metadata(self, new_metadata):

class Corpus(models.Model):
"""
This model will hold associations to other Documents and their
metadata (author, title, publication date, etc.)
This model holds associations to other Documents and their
metadata (author, title, publication date, etc.).
"""
title = models.CharField(max_length=30)
description = models.CharField(max_length=500, blank=True)
Expand All @@ -503,28 +502,40 @@ class Corpus(models.Model):
class Meta:
verbose_name_plural = "Corpora"

def __repr__(self):
"""
:return: A console-friendly representation of a `Corpus` object.
"""
return f'<Corpus {self.pk}: {self.title}>'

def __str__(self):
"""Returns the title of the corpus"""
"""
Specifies the `Corpus`'s title as its string representation.
:return: A string representation of a `Corpus` object.
"""
return self.title

def __len__(self):
"""Returns the number of documents associated with this corpus"""
return len(self.document_set.all())
"""
:return: The number of documents associated with this `Corpus` object as an int.
"""
return self.documents.count()

def __iter__(self):
"""Yields each document associated with the corpus"""
for this_document in self.document_set.all():
yield this_document
"""
Yields each `Document` associated with the `Corpus` object.
"""
for doc_id in self.documents.values_list('pk', flat=True):
yield self.documents.get(pk=doc_id)

def __eq__(self, other):
"""Returns true if both of the corpora are associated with the same documents"""
"""
:return: True if both of the corpora are associated with the same `Document`s.
"""
if not isinstance(other, Corpus):
raise NotImplementedError("Only a Corpus can be compared to another Corpus.")

if len(self) != len(other):
return False

if set(self.document_set.all()) == set(other.document_set.all()):
return True
else:
return False
return list(self.documents.values_list('pk', flat=True)) == list(other.documents.values_list('pk', flat=True))
4 changes: 2 additions & 2 deletions backend/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class PronounSeriesSerializer(serializers.ModelSerializer):

class Meta:
model = PronounSeries
fields = ['identifier', 'subj', 'obj', 'pos_det', 'pos_pro', 'reflex', 'all_pronouns']
fields = ['id', 'identifier', 'subj', 'obj', 'pos_det', 'pos_pro', 'reflex', 'all_pronouns']


class GenderSerializer(serializers.ModelSerializer):
Expand All @@ -31,7 +31,7 @@ class GenderSerializer(serializers.ModelSerializer):

class Meta:
model = Gender
fields = ['label', 'pronoun_series', 'pronouns', 'subj', 'obj']
fields = ['id', 'label', 'pronoun_series', 'pronouns', 'subj', 'obj']


class DocumentSerializer(serializers.ModelSerializer):
Expand Down

0 comments on commit 9b828d9

Please sign in to comment.