From 069725f367a66939bf0cae74bcf499eac525cfc0 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 16:41:16 -0400 Subject: [PATCH 1/6] Install factory-boy and pyhumps for nice fixtures. --- perma_web/requirements.in | 2 ++ perma_web/requirements.txt | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/perma_web/requirements.in b/perma_web/requirements.in index cac189499..0615500ae 100644 --- a/perma_web/requirements.in +++ b/perma_web/requirements.in @@ -79,6 +79,7 @@ PyNaCl # encryption beautifulsoup4 # parses html of responses coverage # record code coverage django-admin-smoke-tests # basic tests for the Django admin +factory-boy # create django model fixtures on demand fakeredis[lua] # simulate redis backend for tests flake8 # code linting hypothesis # run tests with lots of generated input @@ -88,6 +89,7 @@ pytest-django # tools for django in pytest pytest-django-ordering # runs transaction-wrapped tests before table truncating tests pytest-xdist # run tests in parallel pytest # test runner +pyhumps # a helper for dealing with camel case and snake case pytest-playwright # integration tests pytest-django-liveserver-ssl # integration tests w/SSL django-extensions # runserver_plus for SSL diff --git a/perma_web/requirements.txt b/perma_web/requirements.txt index e31c52d60..e35e5462d 100644 --- a/perma_web/requirements.txt +++ b/perma_web/requirements.txt @@ -342,6 +342,14 @@ executing==0.8.3 \ --hash=sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501 \ --hash=sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9 # via stack-data +factory-boy==3.3.0 \ + --hash=sha256:a2cdbdb63228177aa4f1c52f4b6d83fab2b8623bf602c7dedd7eb83c0f69c04c \ + --hash=sha256:bc76d97d1a65bbd9842a6d722882098eb549ec8ee1081f9fb2e8ff29f0c300f1 + # via -r requirements.in +faker==19.12.0 \ + --hash=sha256:5990380a8ee81cf189d6b85d5fdc1fb43772cb136aca0385a08ff24ef01b9fdf \ + --hash=sha256:91438f6b1713274ec3f24970ba303617be86ce5caf6f6a0776f1d04777b6ff5f + # via factory-boy fakeredis[lua]==2.10.2 \ --hash=sha256:6377c27bc557be46089381d43fd670aece46672d091a494f73ab4c96c34022b3 \ --hash=sha256:e2a95fbda7b11188c117d68b0f9eecc00600cb449ccf3362a15fc03cf9e2477d @@ -849,6 +857,10 @@ pygments==2.15.1 \ --hash=sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c \ --hash=sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1 # via ipython +pyhumps==3.8.0 \ + --hash=sha256:060e1954d9069f428232a1adda165db0b9d8dfdce1d265d36df7fbff540acfd6 \ + --hash=sha256:498026258f7ee1a8e447c2e28526c0bea9407f9a59c03260aee4bd6c04d681a3 + # via -r requirements.in pynacl==1.5.0 \ --hash=sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858 \ --hash=sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d \ @@ -932,6 +944,7 @@ python-dateutil==2.8.2 \ --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # via # botocore + # faker # timegate python-slugify==6.1.2 \ --hash=sha256:272d106cb31ab99b3496ba085e3fea0e9e76dcde967b5e9992500d1f785ce4e1 \ From b945caed572c2433aae6e4c9767433e6f609e761 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 16:58:22 -0400 Subject: [PATCH 2/6] Add basic LinkUser, CaptureJob, and Link factory fixtures. --- perma_web/conftest.py | 129 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/perma_web/conftest.py b/perma_web/conftest.py index c93a62323..b2bc34762 100644 --- a/perma_web/conftest.py +++ b/perma_web/conftest.py @@ -149,3 +149,132 @@ def logged_in_user(page, urls, user): password.type(user.password) page.locator("button.btn.login").click() return page + + +### ### +### New Fixtures ### +### ### + +# As we modernize the test suite, we can start putting new fixtures here. +# The separation should make it easier to work out, going forward, what can be deleted. + +import factory +from factory.django import DjangoModelFactory +import humps + +from datetime import timezone as tz +from django.db.models import signals + +from perma.models import LinkUser, Link, CaptureJob + + +### internal helpers ### + +# functions used within this file to set up fixtures + +def register_factory(cls): + """ + Decorator to take a factory class and inject test fixtures. For example, + @register_factory + class UserFactory + will inject the fixtures "user_factory" (equivalent to UserFactory) and "user" (equivalent to UserFactory()). + This is basically the same as the @register decorator provided by the pytest_factoryboy package, + but because it's simpler it seems to work better with RelatedFactory and SubFactory. + """ + snake_case_name = humps.decamelize(cls.__name__) + + @pytest.fixture + def factory_fixture(db): + return cls + + @pytest.fixture + def instance_fixture(db): + return cls() + + globals()[snake_case_name] = factory_fixture + globals()[snake_case_name.rsplit('_factory', 1)[0]] = instance_fixture + + return cls + + +### model factories ### + +@register_factory +class LinkUserFactory(DjangoModelFactory): + class Meta: + model = LinkUser + + first_name = factory.Faker('first_name') + last_name = factory.Faker('last_name') + email = factory.Sequence(lambda n: 'user%s@example.com' % n) + is_active = True + is_confirmed=True + + password = factory.PostGenerationMethodCall('set_password', 'pass') + + +@register_factory +class CaptureJobFactory(DjangoModelFactory): + class Meta: + model = CaptureJob + exclude = ('create_link',) + + created_by = factory.SubFactory(LinkUserFactory) + submitted_url = factory.Faker('url') + + create_link = True + link = factory.Maybe( + 'create_link', + yes_declaration=factory.RelatedFactory( + 'conftest.LinkFactory', + factory_related_name='capture_job', + create_pending_capture_job=False, + created_by=factory.SelfAttribute('..created_by'), + submitted_url=factory.SelfAttribute('..submitted_url'), + ), + no_declaration=None + ) + + +@register_factory +class InvalidCaptureJobFactory(CaptureJobFactory): + submitted_url = 'not-a-valid-url' + status = 'invalid' + message = {'url': ['Not a valid URL.']} + create_link = False + + +@register_factory +class PendingCaptureJobFactory(CaptureJobFactory): + status = 'pending' + + +@register_factory +class InProgressCaptureJobFactory(PendingCaptureJobFactory): + status = 'in_progress' + capture_start_time = factory.Faker('future_datetime', end_date='+1m', tzinfo=tz.utc) + step_count = factory.Faker('pyfloat', min_value=1, max_value=10) + step_description = factory.Faker('text', max_nb_chars=15) + + +@register_factory +@factory.django.mute_signals(signals.pre_save) +class LinkFactory(DjangoModelFactory): + class Meta: + model = Link + exclude = ('create_pending_capture_job', 'created_by') + + created_by = factory.SubFactory(LinkUserFactory) + submitted_url = factory.Faker('url') + + create_pending_capture_job = True + capture_job = factory.Maybe( + 'create_pending_capture_job', + yes_declaration=factory.SubFactory( + PendingCaptureJobFactory, + created_by=factory.SelfAttribute('..created_by'), + submitted_url=factory.SelfAttribute('..submitted_url'), + create_link=False + ), + no_declaration=None + ) From abc7e29abcb93494b09d6f3d1dadd71cadc238c8 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 17:00:02 -0400 Subject: [PATCH 3/6] Migrate test_hard_timeout --- perma_web/perma/tests/test_capture_job.py | 47 ++++++++++++----------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/perma_web/perma/tests/test_capture_job.py b/perma_web/perma/tests/test_capture_job.py index 3180c101d..50fa1060b 100644 --- a/perma_web/perma/tests/test_capture_job.py +++ b/perma_web/perma/tests/test_capture_job.py @@ -22,6 +22,30 @@ def create_capture_job(user, human=True): return capture_job +def test_hard_timeout(pending_capture_job): + + # simulate a failed run_next_capture() + job = CaptureJob.get_next_job(reserve=True) + + # capture_start_time should be set accurately on the server side + assert (job.capture_start_time - timezone.now()).total_seconds() < 60 + + # clean_up_failed_captures shouldn't affect job, since timeout hasn't passed + clean_up_failed_captures() + job.refresh_from_db() + assert job.status == "in_progress" + + # once job is sufficiently old, clean_up_failed_captures should mark it as failed + job.capture_start_time -= timedelta(seconds=settings.CELERY_TASK_TIME_LIMIT+60) + job.save() + clean_up_failed_captures() + job.refresh_from_db() + assert job.status == "failed" + + # failed jobs will have a message indicating failure reason + assert json.loads(job.message)[api_settings.NON_FIELD_ERRORS_KEY][0] == "Timed out." + + class CaptureJobTestCase(TransactionTestCase): fixtures = [ @@ -99,26 +123,3 @@ def test_race_condition_not_prevented(self): self.assertRaisesRegex(AssertionError, r'^Items in the', self.test_race_condition_prevented) CaptureJob.TEST_ALLOW_RACE = False - def test_hard_timeout(self): - create_capture_job(self.user_one) - - # simulate a failed run_next_capture() - job = CaptureJob.get_next_job(reserve=True) - - # capture_start_time should be set accurately on the server side - self.assertLess((job.capture_start_time - timezone.now()).total_seconds(), 60) - - # clean_up_failed_captures shouldn't affect job, since timeout hasn't passed - clean_up_failed_captures() - job.refresh_from_db() - self.assertEqual(job.status, "in_progress") - - # once job is sufficiently old, clean_up_failed_captures should mark it as failed - job.capture_start_time -= timedelta(seconds=settings.CELERY_TASK_TIME_LIMIT+60) - job.save() - clean_up_failed_captures() - job.refresh_from_db() - self.assertEqual(job.status, "failed") - - # failed jobs will have a message indicating failure reason - self.assertEqual(json.loads(job.message)[api_settings.NON_FIELD_ERRORS_KEY][0], "Timed out.") From 7448d48fa47c67165311965751df40971920e31f Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 17:01:03 -0400 Subject: [PATCH 4/6] Migrate race condition tests. --- perma_web/perma/tests/test_capture_job.py | 64 +++++++++++++---------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/perma_web/perma/tests/test_capture_job.py b/perma_web/perma/tests/test_capture_job.py index 50fa1060b..eb1233cd3 100644 --- a/perma_web/perma/tests/test_capture_job.py +++ b/perma_web/perma/tests/test_capture_job.py @@ -1,8 +1,10 @@ +from concurrent.futures import ThreadPoolExecutor from datetime import timedelta import json -from multiprocessing.pool import ThreadPool +import pytest from django.conf import settings +from django.db import connections from django.test import TransactionTestCase from django.utils import timezone from rest_framework.settings import api_settings @@ -22,6 +24,40 @@ def create_capture_job(user, human=True): return capture_job +@pytest.mark.django_db(transaction=True) +def test_race_condition_prevented(pending_capture_job_factory): + """ Fetch two jobs at the same time in threads and make sure same job isn't returned to both. """ + jobs = [ + pending_capture_job_factory(), + pending_capture_job_factory() + ] + + def get_next_job(i): + job = CaptureJob.get_next_job(reserve=True) + for connection in connections.all(): + connection.close() + return job + + CaptureJob.TEST_PAUSE_TIME = .1 + with ThreadPoolExecutor(max_workers=2) as e: + fetched_jobs = e.map(get_next_job, range(2)) + CaptureJob.TEST_PAUSE_TIME = 0 + + assert set(jobs) == set(fetched_jobs) + + +@pytest.mark.django_db(transaction=True) +def test_race_condition_not_prevented(pending_capture_job_factory): + """ + Make sure that test_race_condition_prevented is passing for the right reason -- + should fail if race condition protection is disabled. + """ + CaptureJob.TEST_ALLOW_RACE = True + with pytest.raises(AssertionError, match="Extra items in the left set"): + test_race_condition_prevented(pending_capture_job_factory) + CaptureJob.TEST_ALLOW_RACE = False + + def test_hard_timeout(pending_capture_job): # simulate a failed run_next_capture() @@ -97,29 +133,3 @@ def test_job_queue_order(self): expected_next_jobs = [jobs[i] for i in expected_order] next_jobs = [CaptureJob.get_next_job(reserve=True) for i in range(len(jobs))] self.assertListEqual(next_jobs, expected_next_jobs) - - def test_race_condition_prevented(self): - """ Fetch two jobs at the same time in threads and make sure same job isn't returned to both. """ - jobs = [ - create_capture_job(self.user_one), - create_capture_job(self.user_one) - ] - - def get_next_job(i): - return CaptureJob.get_next_job(reserve=True) - - CaptureJob.TEST_PAUSE_TIME = .1 - fetched_jobs = ThreadPool(2).map(get_next_job, range(2)) - CaptureJob.TEST_PAUSE_TIME = 0 - - self.assertSetEqual(set(jobs), set(fetched_jobs)) - - def test_race_condition_not_prevented(self): - """ - Make sure that test_race_condition_prevented is passing for the right reason -- - should fail if race condition protection is disabled. - """ - CaptureJob.TEST_ALLOW_RACE = True - self.assertRaisesRegex(AssertionError, r'^Items in the', self.test_race_condition_prevented) - CaptureJob.TEST_ALLOW_RACE = False - From c4e8bd6ebe3e0230564d6d2458528d25c6d18d76 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 17:05:45 -0400 Subject: [PATCH 5/6] Migrate queue order test. --- perma_web/perma/tests/test_capture_job.py | 76 ++++++++++++----------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/perma_web/perma/tests/test_capture_job.py b/perma_web/perma/tests/test_capture_job.py index eb1233cd3..1a3b66359 100644 --- a/perma_web/perma/tests/test_capture_job.py +++ b/perma_web/perma/tests/test_capture_job.py @@ -24,6 +24,45 @@ def create_capture_job(user, human=True): return capture_job +def test_job_queue_order(link_user_factory, pending_capture_job_factory): + """ Jobs should be processed round-robin, one per user. """ + + user_one = link_user_factory() + user_two = link_user_factory() + + jobs = [ + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_two, human=True), + + pending_capture_job_factory(created_by=user_two), + + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_one, human=True), + pending_capture_job_factory(created_by=user_two, human=True), + ] + + expected_order = [ + 0, 3, # u1, u2 + 1, 8, # u1, u2 + 2, 5, 6, 7, # remaining u1 jobs + 4 # robots queue + ] + + # test CaptureJob.queue_position + for i, job in enumerate(jobs): + queue_position = job.queue_position() + expected_queue_position = expected_order.index(i)+1 + assert queue_position == expected_queue_position, f"Job {i} has queue position {queue_position}, should be {expected_queue_position}." + + # test CaptureJob.get_next_job + expected_next_jobs = [jobs[i] for i in expected_order] + next_jobs = [CaptureJob.get_next_job(reserve=True) for i in range(len(jobs))] + assert next_jobs == expected_next_jobs + + @pytest.mark.django_db(transaction=True) def test_race_condition_prevented(pending_capture_job_factory): """ Fetch two jobs at the same time in threads and make sure same job isn't returned to both. """ @@ -96,40 +135,3 @@ def setUp(self): self.user_two = LinkUser.objects.get(pk=2) self.maxDiff = None # let assertListEqual compare large lists - - ### TESTS ### - - def test_job_queue_order(self): - """ Jobs should be processed round-robin, one per user. """ - - jobs = [ - create_capture_job(self.user_one), - create_capture_job(self.user_one), - create_capture_job(self.user_one), - create_capture_job(self.user_two), - - create_capture_job(self.user_two, human=False), - - create_capture_job(self.user_one), - create_capture_job(self.user_one), - create_capture_job(self.user_one), - create_capture_job(self.user_two), - ] - - expected_order = [ - 0, 3, # u1, u2 - 1, 8, # u1, u2 - 2, 5, 6, 7, # remaining u1 jobs - 4 # robots queue - ] - - # test CaptureJob.queue_position - for i, job in enumerate(jobs): - queue_position = job.queue_position() - expected_queue_position = expected_order.index(i)+1 - self.assertEqual(queue_position, expected_queue_position, "Job %s has queue position %s, should be %s." % (i, queue_position, expected_queue_position)) - - # test CaptureJob.get_next_job - expected_next_jobs = [jobs[i] for i in expected_order] - next_jobs = [CaptureJob.get_next_job(reserve=True) for i in range(len(jobs))] - self.assertListEqual(next_jobs, expected_next_jobs) From ee508f6823237e055d241c5d42bf10159033997b Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Thu, 26 Oct 2023 17:09:30 -0400 Subject: [PATCH 6/6] Clean up unused scaffolding. --- .../tests/test_current_user_authorization.py | 11 +++++-- perma_web/perma/tests/test_capture_job.py | 30 +------------------ 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/perma_web/api/tests/test_current_user_authorization.py b/perma_web/api/tests/test_current_user_authorization.py index bc0789d24..c64d89f7f 100644 --- a/perma_web/api/tests/test_current_user_authorization.py +++ b/perma_web/api/tests/test_current_user_authorization.py @@ -1,6 +1,13 @@ from .utils import ApiResourceTestCase -from perma.tests.test_capture_job import create_capture_job -from perma.models import LinkUser, Link +from perma.models import LinkUser, Link, CaptureJob + + +def create_capture_job(user, human=True): + link = Link(created_by=user, submitted_url="http://example.com") + link.save() + capture_job = CaptureJob(created_by=user, link=link, human=human, status='pending') + capture_job.save() + return capture_job class CurrentUserAuthorizationTestCase(ApiResourceTestCase): diff --git a/perma_web/perma/tests/test_capture_job.py b/perma_web/perma/tests/test_capture_job.py index 1a3b66359..ffe758748 100644 --- a/perma_web/perma/tests/test_capture_job.py +++ b/perma_web/perma/tests/test_capture_job.py @@ -5,24 +5,12 @@ from django.conf import settings from django.db import connections -from django.test import TransactionTestCase from django.utils import timezone from rest_framework.settings import api_settings -from perma.models import CaptureJob, Link, LinkUser +from perma.models import CaptureJob from perma.celery_tasks import clean_up_failed_captures -# TODO: -# - check retry behavior - -# lives outside CaptureJobTestCase so it can be used by other tests -def create_capture_job(user, human=True): - link = Link(created_by=user, submitted_url="http://example.com") - link.save() - capture_job = CaptureJob(created_by=user, link=link, human=human, status='pending') - capture_job.save() - return capture_job - def test_job_queue_order(link_user_factory, pending_capture_job_factory): """ Jobs should be processed round-robin, one per user. """ @@ -119,19 +107,3 @@ def test_hard_timeout(pending_capture_job): # failed jobs will have a message indicating failure reason assert json.loads(job.message)[api_settings.NON_FIELD_ERRORS_KEY][0] == "Timed out." - - -class CaptureJobTestCase(TransactionTestCase): - - fixtures = [ - 'fixtures/users.json', - 'fixtures/folders.json', - ] - - def setUp(self): - super(CaptureJobTestCase, self).setUp() - - self.user_one = LinkUser.objects.get(pk=1) - self.user_two = LinkUser.objects.get(pk=2) - - self.maxDiff = None # let assertListEqual compare large lists