harvard-lil · rebeccacremona · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023
diff --git a/perma_web/perma/admin.py b/perma_web/perma/admin.py
@@ -100,7 +100,7 @@ class ScoopJobIDFilter(InputFilter):
     def queryset(self, request, queryset):
         value = self.value()
         if value:
-            return queryset.filter(scoop_logs__id_capture=value)
+            return queryset.filter(scoop_job_id=value)
 
 
 class TagFilter(InputFilter):
@@ -668,11 +668,6 @@ def link_creation_timestamp(self, obj):
             return obj.link.creation_timestamp
         return None
 
-    def scoop_job_id(self, obj):
-        if obj.scoop_logs:
-            return obj.scoop_logs['id_capture']
-        return None
-
     # def link_taglist(self, obj):
     #     if obj.link:
     #         return ", ".join(o.name for o in obj.link.tags.all())

diff --git a/perma_web/perma/celery_tasks.py b/perma_web/perma/celery_tasks.py
@@ -1031,6 +1031,10 @@ def capture_with_scoop(capture_job):
             valid_if=lambda code, data: code == 200 and all(key in data for key in {"status", "id_capture"}) and data["status"] in ["pending", "started"],
         )
 
+        # Save the Scoop job id for our records
+        capture_job.scoop_job_id = request_data['id_capture']
+        capture_job.save(update_fields=['scoop_job_id'])
+
         # Poll until done
         poll_network_errors = 0
         while True:
@@ -1041,7 +1045,7 @@ def capture_with_scoop(capture_job):
             try:
                 _, poll_data = send_to_scoop(
                     method='get',
-                    path=f"capture/{request_data['id_capture']}",
+                    path=f"capture/{capture_job.scoop_job_id}",
                     json={
                         "url": target_url
                     },
@@ -1060,7 +1064,7 @@ def capture_with_scoop(capture_job):
 
             # Show progress to user. Assumes Scoop won't take much longer than ~60s, worst case scenario
             wait_time = time.time() - scoop_start_time
-            inc_progress(capture_job, min(wait_time/60, 0.99), "Waiting for Scoop to finish")
+            inc_progress(capture_job, min(wait_time/60, 0.99), f"Waiting for Scoop job {capture_job.scoop_job_id} to finish: {poll_data['status']}")
 
         capture_job.scoop_logs = poll_data
         if poll_data.get('scoop_capture_summary'):

diff --git a/perma_web/perma/migrations/0029_capturejob_scoop_job_id.py b/perma_web/perma/migrations/0029_capturejob_scoop_job_id.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.22 on 2023-10-31 14:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('perma', '0028_auto_20230905_1813'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='capturejob',
+            name='scoop_job_id',
+            field=models.CharField(blank=True, db_index=True, max_length=255, null=True),
+        ),
+    ]
diff --git a/perma_web/perma/models.py b/perma_web/perma/models.py
@@ -2041,6 +2041,7 @@ class CaptureJob(models.Model):
     scoop_start_time = models.DateTimeField(blank=True, null=True)
     scoop_end_time = models.DateTimeField(blank=True, null=True)
     scoop_logs = JSONField(blank=True, null=True)
+    scoop_job_id = models.CharField(max_length=255, blank=True, null=True, db_index=True)
     scoop_state = models.CharField(max_length=255, blank=True, null=True, db_index=True)
 
     superseded = models.BooleanField(default=False, help_text='A user upload has made this CaptureJob irrelevant to the playback of its related Link')