Skip to content

Commit

Permalink
Implement bulk moderation actions (#4654)
Browse files Browse the repository at this point in the history
Co-authored-by: Krystle Salazar <[email protected]>
  • Loading branch information
dhruvkb and krysal authored Jul 30, 2024
1 parent bfa3896 commit dff3a85
Show file tree
Hide file tree
Showing 12 changed files with 877 additions and 48 deletions.
274 changes: 243 additions & 31 deletions api/api/admin/media_report.py

Large diffs are not rendered by default.

39 changes: 38 additions & 1 deletion api/api/constants/moderation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,42 @@ class DecisionAction(models.TextChoices):
REVERSED_DEINDEX = "reversed_deindex", "Reversed deindex"

@property
def is_reversal(self):
def is_forward(self):
return self in {
self.MARKED_SENSITIVE,
self.DEINDEXED_COPYRIGHT,
self.DEINDEXED_SENSITIVE,
}

@property
def is_reverse(self):
return self in {self.REVERSED_DEINDEX, self.REVERSED_MARK_SENSITIVE}

@property
def is_deindex(self):
return self in {self.DEINDEXED_COPYRIGHT, self.DEINDEXED_SENSITIVE}

@property
def verb(self) -> str:
"""
Return the verb form of the action for use in sentences.
:param object: the object of the sentence
:return: the grammatically coherent verb phrase of the action
"""

match self:
case self.MARKED_SENSITIVE:
return "marked as sensitive"
case self.DEINDEXED_COPYRIGHT:
return "deindexed (copyright)"
case self.DEINDEXED_SENSITIVE:
return "deindexed (sensitive)"
case self.REJECTED_REPORTS:
return "rejected"
case self.DEDUPLICATED_REPORTS:
return "de-duplicated"
case self.REVERSED_MARK_SENSITIVE:
return "unmarked as sensitive"
case self.REVERSED_DEINDEX:
return "reindexed"
99 changes: 85 additions & 14 deletions api/api/models/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.utils.html import format_html

import structlog
from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch import Elasticsearch, NotFoundError, helpers
from openverse_attribution.license import License

from api.constants.moderation import DecisionAction
Expand Down Expand Up @@ -365,9 +365,9 @@ def save(self, *args, **kwargs):


class PerformIndexUpdateMixin:
@property
def indexes(self):
return [self.es_index, f"{self.es_index}-filtered"]
@classmethod
def indexes(cls):
return [cls.es_index, f"{cls.es_index}-filtered"]

def _perform_index_update(self, method: str, raise_errors: bool, **es_method_args):
"""
Expand All @@ -387,7 +387,7 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
f"with identifier {self.media_obj.identifier}."
)

for index in self.indexes:
for index in self.indexes():
try:
getattr(es, method)(
index=index,
Expand All @@ -404,6 +404,42 @@ def _perform_index_update(self, method: str, raise_errors: bool, **es_method_arg
)
continue

@classmethod
def _bulk_perform_index_update(
cls,
method: str,
document_ids: list[str],
**es_method_args,
):
"""
Call ``method`` on the Elasticsearch client in a bulk operation.
Automatically handles 404 errors for documents, forces a refresh,
and calls the method for origin and filtered indexes.
Unlike the single-document behaviour, this function does not
provide validation to check if the media objects exist.
"""

es: Elasticsearch = settings.ES

actions = [
{
"_op_type": method,
"_index": index,
"_id": document_id,
**es_method_args,
}
for index in cls.indexes()
for document_id in document_ids
]

# Perform all actions in bulk, while allowing for missing
# documents, similar to the single-document behaviour. In all
# other cases, this raises ``BulkIndexError``.
helpers.bulk(es, actions, ignore_status=(404,))
es.indices.refresh(index=cls.indexes())


class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
"""
Expand Down Expand Up @@ -432,22 +468,41 @@ class AbstractDeletedMedia(PerformIndexUpdateMixin, OpenLedgerModel):
"""
Sub-classes must override this field to point to a concrete sub-class of
``AbstractMedia``.
Note that unlike ``AbstractSensitiveMedia``, this does not provide
a ``delete()`` method to undo the effects of ``save()``. Deindexed
media can only be restored through a data refresh.
"""

class Meta:
abstract = True

def _update_es(self, raise_errors: bool) -> models.Model:
def save(self, *args, **kwargs):
super().save(*args, **kwargs)
self.perform_action()

def _update_es(self, raise_errors: bool):
self._perform_index_update(
"delete",
raise_errors,
)

def save(self, *args, **kwargs):
def perform_action(self):
self._update_es(True)
super().save(*args, **kwargs)
self.media_obj.delete() # remove the actual model instance

@classmethod
def _bulk_update_es(cls, media_item_ids: list[str]):
cls._bulk_perform_index_update(
"delete",
media_item_ids,
)

@classmethod
def bulk_perform_action(cls, media_items: list[type[AbstractMedia]]):
cls._bulk_update_es(media_items.values_list("id", flat=True))
media_items.delete() # remove the actual model instances


class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model):
"""
Expand Down Expand Up @@ -481,6 +536,14 @@ class AbstractSensitiveMedia(PerformIndexUpdateMixin, models.Model):
class Meta:
abstract = True

def save(self, *args, **kwargs):
self._update_es(True, True)
super().save(*args, **kwargs)

def delete(self, *args, **kwargs):
self._update_es(False, False)
super().delete(*args, **kwargs)

def _update_es(self, is_mature: bool, raise_errors: bool):
"""
Update the Elasticsearch document associated with the given model.
Expand All @@ -494,13 +557,21 @@ def _update_es(self, is_mature: bool, raise_errors: bool):
doc={"mature": is_mature},
)

def save(self, *args, **kwargs):
self._update_es(True, True)
super().save(*args, **kwargs)
@classmethod
def _bulk_update_es(cls, is_mature: bool, media_item_ids: list[str]):
cls._bulk_perform_index_update(
"update",
media_item_ids,
doc={"mature": is_mature},
)

def delete(self, *args, **kwargs):
self._update_es(False, False)
super().delete(*args, **kwargs)
@classmethod
def bulk_perform_action(
cls,
is_mature: bool,
media_items: list[type[AbstractMedia]],
):
cls._bulk_update_es(is_mature, media_items.values_list("id", flat=True))


class AbstractMediaList(OpenLedgerModel):
Expand Down
68 changes: 68 additions & 0 deletions api/api/templates/admin/api/bulk_moderation_confirmation.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{% extends "admin/base_site.html" %}
{% load admin_urls static %}

{% block extrahead %}
{{ block.super }}
{{ media }}
<script src="{% static 'admin/js/cancel.js' %}" async></script>
{% endblock %}

{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}

{% block breadcrumbs %}
<div class="breadcrumbs">
<a href="{% url 'admin:index' %}">Home</a>
&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
&rsaquo; Bulk moderate
</div>
{% endblock %}

{% block content %}
<p>Are you sure you want the selected {{ objects_name }} to be {{ decision_action.verb }}?</p>

{% if decision_action == "reversed_deindex" %}
<div class="bg-warning p-10px">
This action does not immediately result in deindexed records being
added back to the DB and Elasticsearch indices. When the records were
originally deindexed, they were deleted from both, and there is no
quick way to restore them without running a data refresh.
</div>
<style>
.p-10px {
padding: 10px;
}
.bg-warning {
background-color: var(--message-warning-bg);
}
</style>
{% endif %}

<h2>Summary</h2>
<ul>
{% for stat_key, stat_value in stats %}
<li>{{ stat_key|capfirst }}: {{ stat_value }}</li>
{% endfor %}
</ul>

<h2>Objects</h2>
{% for moderatable_object in moderatable_objects %}
<ul>{{ moderatable_object|unordered_list }}</ul>
{% endfor %}

<form method="post">
{% csrf_token %}
<div>
<div>
<textarea required name="notes" placeholder="Please provide an explanation." rows="4" cols="80"></textarea>
</div>
{% for obj in queryset %}
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk }}">
{% endfor %}
<input type="hidden" name="action" value="{{ decision_action }}">
<input type="hidden" name="post" value="yes">
<input type="submit" value="Yes, I’m sure">
<a href="#" class="button cancel-link">No, take me back</a>
</div>
</form>
{% endblock %}
Loading

0 comments on commit dff3a85

Please sign in to comment.