Skip to content

Commit

Permalink
Merge pull request #18 from dhmit/create_corpus_model
Browse files Browse the repository at this point in the history
Create corpus model
  • Loading branch information
phuang00 authored Jul 10, 2021
2 parents f90c87d + 30e7c29 commit 54d8bed
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 12 deletions.
1 change: 1 addition & 0 deletions backend/app/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
models.Document,
models.PronounSeries,
models.Gender,
models.Corpus
]

for model in models_to_register:
Expand Down
25 changes: 25 additions & 0 deletions backend/app/migrations/0007_corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.1.5 on 2021-07-10 21:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('app', '0006_initial_data'),
]

operations = [
migrations.CreateModel(
name='Corpus',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=30)),
('description', models.CharField(blank=True, max_length=500)),
('documents', models.ManyToManyField(to='app.Document')),
],
options={
'verbose_name_plural': 'Corpora',
},
),
]
39 changes: 39 additions & 0 deletions backend/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,42 @@ def update_metadata(self, new_metadata):
self.text = new_metadata['text']
self.get_tokenized_text_wc_and_pos()
self.save()


class Corpus(models.Model):
"""
This model will hold associations to other Documents and their
metadata (author, title, publication date, etc.)
"""
title = models.CharField(max_length=30)
description = models.CharField(max_length=500, blank=True)
documents = models.ManyToManyField(Document)

class Meta:
verbose_name_plural = "Corpora"

def __str__(self):
"""Returns the title of the corpus"""
return self.title

def __len__(self):
"""Returns the number of documents associated with this corpus"""
return len(self.document_set.all())

def __iter__(self):
"""Yields each document associated with the corpus"""
for this_document in self.document_set.all():
yield this_document

def __eq__(self, other):
"""Returns true if both of the corpora are associated with the same documents"""
if not isinstance(other, Corpus):
raise NotImplementedError("Only a Corpus can be compared to another Corpus.")

if len(self) != len(other):
return False

if set(self.document_set.all()) == set(other.document_set.all()):
return True
else:
return False
11 changes: 11 additions & 0 deletions backend/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
PronounSeries,
Gender,
Document,
Corpus
)


Expand Down Expand Up @@ -47,6 +48,16 @@ class SimpleDocumentSerializer(serializers.ModelSerializer):
"""
Serializes a Document object (does not include the text itself)
"""

class Meta:
model = Document
fields = ['id', 'author', 'title', 'year', 'word_count']

class CorpusSerializer(serializers.ModelSerializer):
"""
Serializes a Corpus object
"""

class Meta:
model = Corpus
fields = ['id', 'title', 'description']
30 changes: 18 additions & 12 deletions backend/app/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .models import (
PronounSeries,
Document,
Corpus
)


Expand Down Expand Up @@ -186,19 +187,24 @@ def test_update_metadata(self):
self.assertEqual(doc.word_count, 9)


class MainTests(TestCase):
class CorpusTestCase(TestCase):
"""
Backend TestCase
Test Cases for the Corpus Model
"""

# def setUp(self):
# super().setUp()
# do any setup here
def setUp(self):
Corpus.objects.create(title='corpus1', description='testing corpus save')
Document.objects.create_document(title='doc1', year=2021, text='The quick brown fox jumped over the lazy dog.')
Document.objects.create_document(title='doc2', text='She really likes to eat chocolate!')
Document.objects.create_document(title='doc3', text='Do you like ice cream as much as I do?')

def test_add_document_to_corpus(self):
corpus1 = Corpus.objects.get(title='corpus1')
doc1 = Document.objects.get(title='doc1')
doc2 = Document.objects.get(title='doc2')
doc3 = Document.objects.get(title='doc3')
doc1.corpus_set.add(corpus1)
self.assertEqual(list(corpus1.documents.all()), [doc1])
corpus1.documents.add(doc2, doc3)
self.assertEqual(list(corpus1.documents.all()), [doc1, doc2, doc3])

def test_sample(self):
"""
Remove me once we have real tests here.
"""
two = 2
another_two = 2
self.assertEqual(two + another_two, 4)

0 comments on commit 54d8bed

Please sign in to comment.