Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement bulk moderation actions #4654

Merged
merged 31 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
13d56ec
Fix a bug where reports for deindexed media would be hidden
dhruvkb Jul 22, 2024
17df1a9
Add support for bulk ES operations to subreport models
dhruvkb Jul 23, 2024
d80331a
Expand properties of `DecisionAction`
dhruvkb Jul 23, 2024
e6f3e9b
Create util to handle forward and reverse moderation
dhruvkb Jul 23, 2024
14c103f
Show pending reports as bullet points
dhruvkb Jul 24, 2024
fd6d160
Show media objects for decisions even if they are deleted
dhruvkb Jul 24, 2024
0ffd990
Add new filter view for media items with all reports resolved
dhruvkb Jul 24, 2024
4233432
Add a property `verb` to use the action in sentences
dhruvkb Jul 24, 2024
82fec56
Create a template for the bulk moderation confirmation page
dhruvkb Jul 24, 2024
fef4a67
Create a bulk moderation mixin and use in the media admin
dhruvkb Jul 24, 2024
edae8ad
Subclass `MediaSubreportAdmin` for sensitive and deleted media models
dhruvkb Jul 24, 2024
feb2ccd
Fix typo in permissions
dhruvkb Jul 24, 2024
2bba32f
Show timestamp and use it to order sensitive and deleted media
dhruvkb Jul 24, 2024
ad6618f
Show identifiers even for deleted media in decision through models
dhruvkb Jul 24, 2024
a9ec1af
Add support for notes during confirmation
dhruvkb Jul 24, 2024
54e7312
Only allow moderators with decision-making powers to perform bulk mod…
dhruvkb Jul 24, 2024
e10d661
Fix incorrect type of value for `search_fields`
dhruvkb Jul 24, 2024
fc32909
Fix broken logic in reverse mark sensitive
dhruvkb Jul 24, 2024
05bc7be
Handle a `QuerySet` lazy evaluation bug
dhruvkb Jul 24, 2024
fbfbb1d
Add a message explaining the effect of the `REVERSED_DEINDEX` operation
dhruvkb Jul 24, 2024
a34a7c5
Merge branch 'main' of https://github.com/WordPress/openverse into bu…
dhruvkb Jul 24, 2024
f8ba3e4
Sprinkle logging
dhruvkb Jul 24, 2024
45c294e
Make it possible to see sensitive media that has also been deindexed
dhruvkb Jul 24, 2024
f8c0023
Replace "count" with something more descriptive
dhruvkb Jul 26, 2024
ede4e38
Add tests for bulk ES actions
dhruvkb Jul 26, 2024
21396ad
Add factory for deleted-media models
dhruvkb Jul 27, 2024
83715d9
Update `UserFactory` to create moderators
dhruvkb Jul 27, 2024
1945a08
Add tests for the bulk moderation util
dhruvkb Jul 27, 2024
517cfe4
Fix incorrect patch
dhruvkb Jul 27, 2024
e592033
Mock ES to prevent deletions
dhruvkb Jul 27, 2024
45a4cbd
Implement review changes
dhruvkb Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 243 additions & 31 deletions api/api/admin/media_report.py

Large diffs are not rendered by default.

39 changes: 38 additions & 1 deletion api/api/constants/moderation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,42 @@ class DecisionAction(models.TextChoices):
REVERSED_DEINDEX = "reversed_deindex", "Reversed deindex"

@property
def is_reversal(self):
def is_forward(self):
return self in {
self.MARKED_SENSITIVE,
self.DEINDEXED_COPYRIGHT,
self.DEINDEXED_SENSITIVE,
}

@property
def is_reverse(self):
return self in {self.REVERSED_DEINDEX, self.REVERSED_MARK_SENSITIVE}

@property
def is_deindex(self):
return self in {self.DEINDEXED_COPYRIGHT, self.DEINDEXED_SENSITIVE}

@property
def verb(self) -> str:
"""
Return the verb form of the action for use in sentences.

:param object: the object of the sentence
:return: the grammatically coherent verb phrase of the action
"""

match self:
case self.MARKED_SENSITIVE:
return "marked as sensitive"
case self.DEINDEXED_COPYRIGHT:
return "deindexed (copyright)"
case self.DEINDEXED_SENSITIVE:
return "deindexed (sensitive)"
case self.REJECTED_REPORTS:
return "rejected"
case self.DEDUPLICATED_REPORTS:
return "de-duplicated"
case self.REVERSED_MARK_SENSITIVE:
return "unmarked as sensitive"
case self.REVERSED_DEINDEX:
return "reindexed"
99 changes: 85 additions & 14 deletions api/api/models/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.utils.html import format_html

import structlog
from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch import Elasticsearch, NotFoundError, helpers
from openverse_attribution.license import License

from api.constants.moderation import DecisionAction
Expand Down Expand Up @@ -365,9 +365,9 @@ def save(self, *args, **kwargs):


class PerformIndexUpdateMixin:
@property
def indexes(self):
return [self.es_index, f"{self.es_index}-filtered"]
@classmethod
def indexes(cls):
return [cls.es_index, f"{cls.es_index}-filtered"]

def _perform_index_update(self, method: str, raise_errors: bool, **es_method_args):
"""
Expand All @@ -387,7 +387,7 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
f"with identifier {self.media_obj.identifier}."
)

for index in self.indexes:
for index in self.indexes():
try:
getattr(es, method)(
index=index,
Expand All @@ -404,6 +404,42 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
)
continue

@classmethod
def _bulk_perform_index_update(
cls,
method: str,
document_ids: list[str],
**es_method_args,
):
"""
Call ``method`` on the Elasticsearch client in a bulk operation.

Automatically handles 404 errors for documents, forces a refresh,
and calls the method for origin and filtered indexes.

Unlike the single-document behaviour, this function does not
provide validation to check if the media objects exist.
"""

es: Elasticsearch = settings.ES

actions = [
{
"_op_type": method,
"_index": index,
"_id": document_id,
**es_method_args,
}
for index in cls.indexes()
for document_id in document_ids
]

# Perform all actions in bulk, while allowing for missing
# documents, similar to the single-document behaviour. In all
# other cases, this raises ``BulkIndexError``.
helpers.bulk(es, actions, ignore_status=(404,))
es.indices.refresh(index=cls.indexes())


class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
"""
Expand Down Expand Up @@ -432,22 +468,41 @@ class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
"""
Sub-classes must override this field to point to a concrete sub-class of
``AbstractMedia``.

Note that unlike ``AbstractSensitiveMedia``, this does not provide
a ``delete()`` method to undo the effects of ``save()``. Deindexed
media can only be restored through a data refresh.
"""

class Meta:
abstract = True

def _update_es(self, raise_errors: bool) -> models.Model:
def save(self, *args, **kwargs):
super().save(*args, **kwargs)
self.perform_action()

def _update_es(self, raise_errors: bool):
self._perform_index_update(
"delete",
raise_errors,
)

def save(self, *args, **kwargs):
def perform_action(self):
self._update_es(True)
super().save(*args, **kwargs)
self.media_obj.delete() # remove the actual model instance

@classmethod
def _bulk_update_es(cls, media_item_ids: list[str]):
cls._bulk_perform_index_update(
"delete",
media_item_ids,
)

@classmethod
def bulk_perform_action(cls, media_items: list[type[AbstractMedia]]):
cls._bulk_update_es(media_items.values_list("id", flat=True))
media_items.delete() # remove the actual model instances


class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model):
"""
Expand Down Expand Up @@ -481,6 +536,14 @@ class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model):
class Meta:
abstract = True

def save(self, *args, **kwargs):
self._update_es(True, True)
super().save(*args, **kwargs)

def delete(self, *args, **kwargs):
self._update_es(False, False)
super().delete(*args, **kwargs)

def _update_es(self, is_mature: bool, raise_errors: bool):
"""
Update the Elasticsearch document associated with the given model.
Expand All @@ -494,13 +557,21 @@ def _update_es(self, is_mature: bool, raise_errors: bool):
doc={"mature": is_mature},
)

def save(self, *args, **kwargs):
self._update_es(True, True)
super().save(*args, **kwargs)
@classmethod
def _bulk_update_es(cls, is_mature: bool, media_item_ids: list[str]):
cls._bulk_perform_index_update(
"update",
media_item_ids,
doc={"mature": is_mature},
)

def delete(self, *args, **kwargs):
self._update_es(False, False)
super().delete(*args, **kwargs)
@classmethod
def bulk_perform_action(
cls,
is_mature: bool,
media_items: list[type[AbstractMedia]],
):
cls._bulk_update_es(is_mature, media_items.values_list("id", flat=True))


class AbstractMediaList(OpenLedgerModel):
Expand Down
68 changes: 68 additions & 0 deletions api/api/templates/admin/api/bulk_moderation_confirmation.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{% extends "admin/base_site.html" %}
{% load admin_urls static %}

{% block extrahead %}
{{ block.super }}
{{ media }}
<script src="{% static 'admin/js/cancel.js' %}" async></script>
{% endblock %}

{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}

{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">Home</a>
&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
&rsaquo; Bulk moderate
</div>
{% endblock %}

{% block content %}
<p>Are you sure you want the selected {{ objects_name }} to be {{ decision_action.verb }}?</p>

{% if decision_action == "reversed_deindex" %}
<div class="bg-warning p-10px">
This action does not immediately result in deindexed records being
added back to the DB and Elasticsearch indices. When the records were
originally deindexed, they were deleted from both, and there is no
quick way to restore them without running a data refresh.
</div>
<style>
.p-10px {
padding: 10px;
}
.bg-warning {
background-color: var(--message-warning-bg);
}
</style>
{% endif %}

<h2>Summary</h2>
<ul>
{% for stat_key, stat_value in stats %}
<li>{{ stat_key|capfirst }}: {{ stat_value }}</li>
{% endfor %}
</ul>

<h2>Objects</h2>
{% for moderatable_object in moderatable_objects %}
<ul>{{ moderatable_object|unordered_list }}</ul>
{% endfor %}

<form method="post">
{% csrf_token %}
<div>
<div>
<textarea required name="notes" placeholder="Please provide an explanation." rows="4" cols="80"></textarea>
</div>
{% for obj in queryset %}
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk }}">
{% endfor %}
<input type="hidden" name="action" value="{{ decision_action }}">
<input type="hidden" name="post" value="yes">
<input type="submit" value="Yes, I’m sure">
<a href="#" class="button cancel-link">No, take me back</a>
</div>
</form>
{% endblock %}
Loading