From 2291aa848187dbbe4e0175c2b65e4a1259f7136e Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Mon, 30 Oct 2023 14:47:41 -0400 Subject: [PATCH 1/4] Just check that expected key is present. --- perma_web/perma/celery_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perma_web/perma/celery_tasks.py b/perma_web/perma/celery_tasks.py index 36c0157e6..ceb6cd674 100644 --- a/perma_web/perma/celery_tasks.py +++ b/perma_web/perma/celery_tasks.py @@ -829,7 +829,7 @@ def save_scoop_capture(link, capture_job, data): link.primary_capture.content_type = data['scoop_capture_summary']['targetUrlContentType'] link.primary_capture.save(update_fields=['content_type']) - if data['scoop_capture_summary'].get('pageInfo'): + if 'pageInfo' in data['scoop_capture_summary']: title = data['scoop_capture_summary']['pageInfo'].get('title') if title and link.submitted_title == link.get_default_title(): link.submitted_title = title[:2100] From 7732dd4b147bc88572762219f9a5452bc7a89285 Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Mon, 30 Oct 2023 14:48:44 -0400 Subject: [PATCH 2/4] Hide math, for now, for performance. --- perma_web/perma/admin.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/perma_web/perma/admin.py b/perma_web/perma/admin.py index 7eae7c1ca..586a168e8 100644 --- a/perma_web/perma/admin.py +++ b/perma_web/perma/admin.py @@ -633,7 +633,7 @@ def __init__(self, *args, **kwargs): class CaptureJobAdmin(admin.ModelAdmin): - list_display = ['id', 'engine', 'status', 'superseded', 'message', 'created_by_id', 'link_id', 'human', 'submitted_url', 'capture_time', 'scoop_time', 'scoop_state'] + list_display = ['id', 'engine', 'status', 'superseded', 'message', 'created_by_id', 'link_id', 'human', 'submitted_url', 'scoop_state'] list_filter = ['engine', CreatedByFilter, LinkIDFilter, 'status', MessageFilter, 'superseded', JobWithDeletedLinkFilter, 'scoop_state'] raw_id_fields = ['link', 'created_by', 'link_batch'] @@ -654,15 +654,15 @@ def link_taglist(self, obj): return ", ".join(o.name for o in obj.link.tags.all()) return None - def capture_time(self, obj): - if obj.capture_start_time and obj.capture_end_time: - return obj.capture_end_time - obj.capture_start_time - return None + # def capture_time(self, obj): + # if obj.capture_start_time and obj.capture_end_time: + # return obj.capture_end_time - obj.capture_start_time + # return None - def scoop_time(self, obj): - if obj.scoop_start_time and obj.scoop_end_time: - return obj.scoop_end_time - obj.scoop_start_time - return None + # def scoop_time(self, obj): + # if obj.scoop_start_time and obj.scoop_end_time: + # return obj.scoop_end_time - obj.scoop_start_time + # return None class LinkBatchAdmin(admin.ModelAdmin): From 313a2acfe28295577f388e30f32c4f879efecf5f Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Mon, 30 Oct 2023 14:58:45 -0400 Subject: [PATCH 3/4] Spruce up CaptureJob list admin --- perma_web/perma/admin.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/perma_web/perma/admin.py b/perma_web/perma/admin.py index 586a168e8..884c60894 100644 --- a/perma_web/perma/admin.py +++ b/perma_web/perma/admin.py @@ -93,6 +93,16 @@ def queryset(self, request, queryset): return queryset.filter(link__guid__icontains=value) +class ScoopJobIDFilter(InputFilter): + parameter_name = 'scoop_job_id' + title = 'Scoop Job ID' + + def queryset(self, request, queryset): + value = self.value() + if value: + return queryset.filter(scoop_logs__id_capture=value) + + class TagFilter(InputFilter): parameter_name = 'tag' title = 'tag' @@ -102,6 +112,15 @@ def queryset(self, request, queryset): if value: return queryset.filter(tags__name__icontains=value) +class LinkTagFilter(InputFilter): + parameter_name = 'linktag' + title = 'link tag' + + def queryset(self, request, queryset): + value = self.value() + if value: + return queryset.filter(link__tags__name__icontains=value) + class MessageFilter(InputFilter): parameter_name = 'message' @@ -633,8 +652,8 @@ def __init__(self, *args, **kwargs): class CaptureJobAdmin(admin.ModelAdmin): - list_display = ['id', 'engine', 'status', 'superseded', 'message', 'created_by_id', 'link_id', 'human', 'submitted_url', 'scoop_state'] - list_filter = ['engine', CreatedByFilter, LinkIDFilter, 'status', MessageFilter, 'superseded', JobWithDeletedLinkFilter, 'scoop_state'] + list_display = ['id', 'engine', 'status', 'superseded', 'message', 'created_by_id', 'link_id', 'human', 'submitted_url', 'scoop_state', 'scoop_job_id'] + list_filter = ['engine', CreatedByFilter, LinkIDFilter, 'status', LinkTagFilter, MessageFilter, 'superseded', JobWithDeletedLinkFilter, 'scoop_state', ScoopJobIDFilter] raw_id_fields = ['link', 'created_by', 'link_batch'] paginator = FasterAdminPaginator @@ -649,11 +668,16 @@ def link_creation_timestamp(self, obj): return obj.link.creation_timestamp return None - def link_taglist(self, obj): - if obj.link: - return ", ".join(o.name for o in obj.link.tags.all()) + def scoop_job_id(self, obj): + if obj.scoop_logs: + return obj.scoop_logs['id_capture'] return None + # def link_taglist(self, obj): + # if obj.link: + # return ", ".join(o.name for o in obj.link.tags.all()) + # return None + # def capture_time(self, obj): # if obj.capture_start_time and obj.capture_end_time: # return obj.capture_end_time - obj.capture_start_time From 88663dbcf4313d75e9d15af986454293cf3649ee Mon Sep 17 00:00:00 2001 From: Rebecca Cremona Date: Mon, 30 Oct 2023 15:01:05 -0400 Subject: [PATCH 4/4] Add a redundant link to capture job from link admin (to supplement inline). --- perma_web/perma/admin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perma_web/perma/admin.py b/perma_web/perma/admin.py index 884c60894..2b43be222 100644 --- a/perma_web/perma/admin.py +++ b/perma_web/perma/admin.py @@ -587,13 +587,13 @@ class LinkAdmin(SimpleHistoryAdmin): list_display = ['guid', 'submitted_url', 'created_by', 'creation_timestamp', 'tag_list', 'is_private', 'user_deleted', 'cached_can_play_back', 'captured_by_software', 'internet_archive_upload_status', 'file_size'] list_filter = [GUIDFilter, CreatedByFilter, SubmittedURLFilter, TagFilter, 'cached_can_play_back', 'captured_by_software', 'internet_archive_upload_status'] fieldsets = ( - (None, {'fields': ('guid', 'submitted_url', 'submitted_url_surt','submitted_title', 'submitted_description', 'created_by', 'creation_timestamp', 'captured_by_software', 'captured_by_browser', 'file_size', 'replacement_link', 'tags')}), + (None, {'fields': ('guid', 'capture_job', 'submitted_url', 'submitted_url_surt','submitted_title', 'submitted_description', 'created_by', 'creation_timestamp', 'captured_by_software', 'captured_by_browser', 'file_size', 'replacement_link', 'tags')}), ('Visibility', {'fields': ('is_private', 'private_reason', 'is_unlisted',)}), ('User Delete', {'fields': ('user_deleted', 'user_deleted_timestamp',)}), ('Organization', {'fields': ('folders', 'notes')}), ('Mirroring', {'fields': ('archive_timestamp', 'internet_archive_upload_status', 'cached_can_play_back')}), ) - readonly_fields = ['guid', 'folders', 'creation_timestamp', 'file_size', 'captured_by_software', 'captured_by_browser', 'archive_timestamp'] + readonly_fields = ['guid', 'capture_job', 'folders', 'creation_timestamp', 'file_size', 'captured_by_software', 'captured_by_browser', 'archive_timestamp'] inlines = [ new_class("CaptureInline", admin.TabularInline, model=Capture, fields=['role', 'status', 'url', 'content_type', 'record_type', 'user_upload'],