From fc765129e2ddc29dd996d41fc9faed7df9e0b602 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Tue, 31 Oct 2023 11:15:55 -0400 Subject: [PATCH 1/2] Save the Scoop job id in a dedicated field. --- perma_web/perma/admin.py | 7 +------ perma_web/perma/celery_tasks.py | 6 +++++- .../migrations/0029_capturejob_scoop_job_id.py | 18 ++++++++++++++++++ perma_web/perma/models.py | 1 + 4 files changed, 25 insertions(+), 7 deletions(-) create mode 100644 perma_web/perma/migrations/0029_capturejob_scoop_job_id.py diff --git a/perma_web/perma/admin.py b/perma_web/perma/admin.py index 2b43be222..f898a61b3 100644 --- a/perma_web/perma/admin.py +++ b/perma_web/perma/admin.py @@ -100,7 +100,7 @@ class ScoopJobIDFilter(InputFilter): def queryset(self, request, queryset): value = self.value() if value: - return queryset.filter(scoop_logs__id_capture=value) + return queryset.filter(scoop_job_id=value) class TagFilter(InputFilter): @@ -668,11 +668,6 @@ def link_creation_timestamp(self, obj): return obj.link.creation_timestamp return None - def scoop_job_id(self, obj): - if obj.scoop_logs: - return obj.scoop_logs['id_capture'] - return None - # def link_taglist(self, obj): # if obj.link: # return ", ".join(o.name for o in obj.link.tags.all()) diff --git a/perma_web/perma/celery_tasks.py b/perma_web/perma/celery_tasks.py index ceb6cd674..fc67000df 100644 --- a/perma_web/perma/celery_tasks.py +++ b/perma_web/perma/celery_tasks.py @@ -1031,6 +1031,10 @@ def capture_with_scoop(capture_job): valid_if=lambda code, data: code == 200 and all(key in data for key in {"status", "id_capture"}) and data["status"] in ["pending", "started"], ) + # Save the Scoop job id for our records + capture_job.scoop_job_id = request_data['id_capture'] + capture_job.save(update_fields=['scoop_job_id']) + # Poll until done poll_network_errors = 0 while True: @@ -1041,7 +1045,7 @@ def capture_with_scoop(capture_job): try: _, poll_data = send_to_scoop( method='get', - path=f"capture/{request_data['id_capture']}", + path=f"capture/{capture_job.scoop_job_id}", json={ "url": target_url }, diff --git a/perma_web/perma/migrations/0029_capturejob_scoop_job_id.py b/perma_web/perma/migrations/0029_capturejob_scoop_job_id.py new file mode 100644 index 000000000..fa7097c2f --- /dev/null +++ b/perma_web/perma/migrations/0029_capturejob_scoop_job_id.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.22 on 2023-10-31 14:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('perma', '0028_auto_20230905_1813'), + ] + + operations = [ + migrations.AddField( + model_name='capturejob', + name='scoop_job_id', + field=models.CharField(blank=True, db_index=True, max_length=255, null=True), + ), + ] diff --git a/perma_web/perma/models.py b/perma_web/perma/models.py index f44fea2ef..9b5e5307a 100755 --- a/perma_web/perma/models.py +++ b/perma_web/perma/models.py @@ -2041,6 +2041,7 @@ class CaptureJob(models.Model): scoop_start_time = models.DateTimeField(blank=True, null=True) scoop_end_time = models.DateTimeField(blank=True, null=True) scoop_logs = JSONField(blank=True, null=True) + scoop_job_id = models.CharField(max_length=255, blank=True, null=True, db_index=True) scoop_state = models.CharField(max_length=255, blank=True, null=True, db_index=True) superseded = models.BooleanField(default=False, help_text='A user upload has made this CaptureJob irrelevant to the playback of its related Link') From 97269b67e2b430cd73fef6434e4ee126d02dfe89 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Tue, 31 Oct 2023 11:17:46 -0400 Subject: [PATCH 2/2] Log Scoop job id and status. --- perma_web/perma/celery_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perma_web/perma/celery_tasks.py b/perma_web/perma/celery_tasks.py index fc67000df..db620c6b2 100644 --- a/perma_web/perma/celery_tasks.py +++ b/perma_web/perma/celery_tasks.py @@ -1064,7 +1064,7 @@ def capture_with_scoop(capture_job): # Show progress to user. Assumes Scoop won't take much longer than ~60s, worst case scenario wait_time = time.time() - scoop_start_time - inc_progress(capture_job, min(wait_time/60, 0.99), "Waiting for Scoop to finish") + inc_progress(capture_job, min(wait_time/60, 0.99), f"Waiting for Scoop job {capture_job.scoop_job_id} to finish: {poll_data['status']}") capture_job.scoop_logs = poll_data if poll_data.get('scoop_capture_summary'):