Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: LEAP-648: Reset and recalculate project summary #5432

Merged
merged 22 commits into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions label_studio/core/all_urls.json
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,12 @@
"name": "projects:api:project-summary",
"decorators": ""
},
{
"url": "/api/projects/<int:pk>/summary/reset/",
"module": "projects.api.ProjectSummaryResetAPI",
"name": "projects:api:project-summary-reset",
"decorators": ""
},
{
"url": "/api/projects/<int:pk>/imports/<int:import_pk>/",
"module": "projects.api.ProjectImportAPI",
Expand Down
28 changes: 28 additions & 0 deletions label_studio/projects/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from core.label_config import config_essential_data_has_changed
from core.mixins import GetParentObjectMixin
from core.permissions import ViewClassPermission, all_permissions
from core.redis import start_job_async_or_sync
from core.utils.common import paginator, paginator_help, temporary_disconnect_all_signals
from core.utils.exceptions import LabelStudioDatabaseException, ProjectExistException
from core.utils.io import find_dir, find_file, read_yaml
Expand All @@ -23,6 +24,7 @@
from drf_yasg.utils import swagger_auto_schema
from projects.functions.next_task import get_next_task
from projects.functions.stream_history import get_label_stream_history
from projects.functions.utils import recalculate_created_annotations_and_labels_from_scratch
from projects.models import Project, ProjectImport, ProjectManager, ProjectReimport, ProjectSummary
from projects.serializers import (
GetFieldsSerializer,
Expand Down Expand Up @@ -397,6 +399,32 @@ def get(self, *args, **kwargs):
return super(ProjectSummaryAPI, self).get(*args, **kwargs)


class ProjectSummaryResetAPI(GetParentObjectMixin, generics.CreateAPIView):
"""This API is useful when we need to reset project.summary.created_labels and created_labels_drafts
and recalculate them from scratch. It's hard to correctly follow all changes in annotation region
labels and these fields aren't calculated properly after some time. Label config changes are not allowed
when these changes touch any labels from these created_labels* dictionaries.
"""

parser_classes = (JSONParser,)
parent_queryset = Project.objects.all()
permission_required = ViewClassPermission(
POST=all_permissions.projects_change,
)

@swagger_auto_schema(auto_schema=None)
def post(self, *args, **kwargs):
project = self.get_parent_object()
summary = project.summary
start_job_async_or_sync(
recalculate_created_annotations_and_labels_from_scratch,
project,
summary,
organization_id=self.request.user.active_organization.id,
)
return Response(status=status.HTTP_200_OK)


@method_decorator(
name='get',
decorator=swagger_auto_schema(
Expand Down
37 changes: 36 additions & 1 deletion label_studio/projects/functions/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
from tasks.models import Task
from logging import getLogger
from typing import TYPE_CHECKING

from tasks.models import AnnotationDraft, Task

logger = getLogger(__name__)


if TYPE_CHECKING:
from projects.models import Project, ProjectSummary


def make_queryset_from_iterable(tasks_list):
Expand Down Expand Up @@ -26,3 +35,29 @@ def make_queryset_from_iterable(tasks_list):
raise ValueError(f'Unknown object type: {str(task)}')
queryset = Task.objects.filter(id__in=ids)
return queryset


def recalculate_created_annotations_and_labels_from_scratch(
project: 'Project', summary: 'ProjectSummary', organization_id: int
) -> None:
"""Recalculate created_labels, created_annotations and created_labels_drafts from scratch

:param project: Project
:param summary: ProjectSummary
:param organization_id: Organization.id, it is required for django-rq displaying on admin page
"""
logger.info(f'Reset cache started for project {project.id} and organization {organization_id}')

summary.created_labels, summary.created_annotations = {}, {}
summary.update_created_annotations_and_labels(project.annotations.all())

summary.created_labels_drafts = {}
drafts = AnnotationDraft.objects.filter(task__project=project)
summary.update_created_labels_drafts(drafts)

logger.info(
f'Reset cache finished for project {project.id} and organization {organization_id}:\n'
f'created_annotations = {summary.created_annotations}\n'
f'created_labels = {summary.created_labels}\n'
f'created_labels_drafts = {summary.created_labels_drafts}'
)
100 changes: 55 additions & 45 deletions label_studio/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,37 +1200,43 @@ def update_created_annotations_and_labels(self, annotations):
self.save(update_fields=['created_annotations', 'created_labels'])

def remove_created_annotations_and_labels(self, annotations):
created_annotations = dict(self.created_annotations)
labels = dict(self.created_labels)
for annotation in annotations:
results = get_attr_or_item(annotation, 'result') or []
if not isinstance(results, list):
continue
# we are going to remove all annotations, so we'll reset the corresponding fields on the summary
remove_all_annotations = self.project.annotations.count() == len(annotations)
created_annotations, created_labels = (
({}, {}) if remove_all_annotations else (dict(self.created_annotations), dict(self.created_labels))
)

for result in results:
# reduce annotation counters
key = self._get_annotation_key(result)
if key in created_annotations:
created_annotations[key] -= 1
if created_annotations[key] == 0:
created_annotations.pop(key)

# reduce labels counters
from_name = result.get('from_name', None)
if from_name not in labels:
if not remove_all_annotations:
for annotation in annotations:
results = get_attr_or_item(annotation, 'result') or []
if not isinstance(results, list):
continue
for label in self._get_labels(result):
label = str(label)
if label in labels[from_name]:
labels[from_name][label] -= 1
if labels[from_name][label] == 0:
labels[from_name].pop(label)
if not labels[from_name]:
labels.pop(from_name)

for result in results:
# reduce annotation counters
key = self._get_annotation_key(result)
if key in created_annotations:
created_annotations[key] -= 1
if created_annotations[key] == 0:
created_annotations.pop(key)

# reduce labels counters
from_name = result.get('from_name', None)
if from_name not in created_labels:
continue
for label in self._get_labels(result):
label = str(label)
if label in created_labels[from_name]:
created_labels[from_name][label] -= 1
if created_labels[from_name][label] == 0:
created_labels[from_name].pop(label)
if not created_labels[from_name]:
created_labels.pop(from_name)

logger.debug(f'summary.created_annotations = {created_annotations}')
logger.debug(f'summary.created_labels = {labels}')
logger.debug(f'summary.created_labels = {created_labels}')
self.created_annotations = created_annotations
self.created_labels = labels
self.created_labels = created_labels
self.save(update_fields=['created_annotations', 'created_labels'])

def update_created_labels_drafts(self, drafts):
Expand All @@ -1257,25 +1263,29 @@ def update_created_labels_drafts(self, drafts):
self.save(update_fields=['created_labels_drafts'])

def remove_created_drafts_and_labels(self, drafts):
labels = dict(self.created_labels_drafts)
for draft in drafts:
results = get_attr_or_item(draft, 'result') or []
if not isinstance(results, list):
continue

for result in results:
# reduce labels counters
from_name = result.get('from_name', None)
if from_name not in labels:
# we are going to remove all drafts, so we'll reset the corresponding field on the summary
remove_all_drafts = AnnotationDraft.objects.filter(task__project=self.project).count() == len(drafts)
labels = {} if remove_all_drafts else dict(self.created_labels_drafts)

if not remove_all_drafts:
for draft in drafts:
results = get_attr_or_item(draft, 'result') or []
if not isinstance(results, list):
continue
for label in self._get_labels(result):
label = str(label)
if label in labels[from_name]:
labels[from_name][label] -= 1
if labels[from_name][label] == 0:
labels[from_name].pop(label)
if not labels[from_name]:
labels.pop(from_name)

for result in results:
# reduce labels counters
from_name = result.get('from_name', None)
if from_name not in labels:
continue
for label in self._get_labels(result):
label = str(label)
if label in labels[from_name]:
labels[from_name][label] -= 1
if labels[from_name][label] == 0:
labels[from_name].pop(label)
if not labels[from_name]:
labels.pop(from_name)
logger.debug(f'summary.created_labels_drafts = {labels}')
self.created_labels_drafts = labels
self.save(update_fields=['created_labels_drafts'])
Expand Down
6 changes: 6 additions & 0 deletions label_studio/projects/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
path('<int:pk>/validate/', api.ProjectLabelConfigValidateAPI.as_view(), name='project-label-config-validate'),
# Project summary
path('<int:pk>/summary/', api.ProjectSummaryAPI.as_view(), name='project-summary'),
# Project summary
path(
'<int:pk>/summary/reset/',
api.ProjectSummaryResetAPI.as_view(),
name='project-summary-reset',
),
# Project import
path('<int:pk>/imports/<int:import_pk>/', api.ProjectImportAPI.as_view(), name='project-imports'),
# Project reimport
Expand Down
16 changes: 8 additions & 8 deletions label_studio/tests/create_project_and_import_data.tavern.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -84,7 +84,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -136,7 +136,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -188,7 +188,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -240,7 +240,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -292,7 +292,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -344,7 +344,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down Expand Up @@ -396,7 +396,7 @@ stages:
content-type: application/json
json:
data:
image: "https://develop.dev.heartex.com/static/samples/sample.jpg"
image: "https://heartex.com/static/samples/sample.jpg"
options:
- value: "a"
- value: "b"
Expand Down
Loading
Loading