diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index e38673f5cee1..05e0e5dc4f0f 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -11,6 +11,7 @@ import os import json import zipfile +from pydub import AudioSegment from scipy.io import wavfile import numpy as np from collections import OrderedDict @@ -19,9 +20,10 @@ from tempfile import TemporaryDirectory from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError -from django.conf import settings +# from django.conf import settings from django.db import transaction from django.db.models.query import Prefetch +from cvat.apps.engine.models import Job, AttributeSpec from django.utils import timezone from rest_framework.exceptions import ValidationError @@ -29,7 +31,6 @@ from cvat.apps.engine.plugins import plugin_decorator from cvat.apps.events.handlers import handle_annotations_change from cvat.apps.profiler import silk_profile -from cvat.apps.engine.cache import MediaCache from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.dataset_manager.annotation import AnnotationIR, AnnotationManager from cvat.apps.dataset_manager.bindings import TaskData, JobData, CvatImportError @@ -878,9 +879,6 @@ def jobChunkPathGetter(db_data, start, stop, task_dimension, data_quality, data_ # db_data = Task Data frame_provider = FrameProvider(db_data, task_dimension) - start_chunk = frame_provider.get_chunk_number(start) - stop_chunk = frame_provider.get_chunk_number(stop) - # self.type = data_type number = int(data_num) if data_num is not None else None @@ -894,15 +892,20 @@ def jobChunkPathGetter(db_data, start, stop, task_dimension, data_quality, data_ return path -def chunk_annotation_audio(audio_file, output_folder, annotations): - # Load audio - # y, sr = librosa.load(audio_file, sr=None) - sr, y = wavfile.read(audio_file) +def chunk_annotation_audio(concat_array, output_folder, annotations): + # Convert NumPy array to AudioSegment + sr = 44100 # sampling rate + audio_segment = AudioSegment(concat_array.tobytes(), frame_rate=sr, channels=1, sample_width=4) + + try: + y = audio_segment.get_array_of_samples() + except Exception as e: + return None data = [] - # Loop over shapes + for i, shape in enumerate(annotations, 1): - # Extract transcript and time points + start_time = min(shape['points'][:2]) end_time = max(shape['points'][2:]) @@ -914,14 +917,11 @@ def chunk_annotation_audio(audio_file, output_folder, annotations): chunk = y[start_sample:end_sample] clip_uuid = str(uuid.uuid4()) - # Save the chunk with transcript as filename - output_file = os.path.join(output_folder, f"{clip_uuid}.wav") + output_file = os.path.join(output_folder, f"{clip_uuid}.mp3") soundfile.write(output_file, chunk, sr) data.append(output_file) - # logger.info(f"Annotation {str(i)} Chunk saved: {output_file}") - return data def create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_file_path, output_folder, dst_file): @@ -959,7 +959,6 @@ def get_np_audio_array_from_job(job_id): job_data_chunk_size = job.db_job.segment.task.data.chunk_size task_dimension = job.db_job.segment.task.dimension - storage_method = job.db_job.segment.task.data.storage_method start = job.start_frame/job_data_chunk_size stop = job.stop_frame/job_data_chunk_size @@ -967,7 +966,7 @@ def get_np_audio_array_from_job(job_id): audio_array_buffer = [] for i in range(math.trunc(start), math.trunc(stop)+1): db_job = job.db_job - data_type = "chunk" + # data_type = "chunk" data_num = i data_quality = 'compressed' @@ -993,30 +992,68 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): # All Annotations annotations = job.data["shapes"] - audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav") - with wave.open(audio_file_path, 'wb') as wave_file: - wave_file.setnchannels(1) - wave_file.setsampwidth(4) - wave_file.setframerate(44100) - wave_file.writeframes(concat_array) + # Job detail - annotation_audio_chunk_file_paths = chunk_annotation_audio(audio_file_path, temp_dir, annotations) + # Find labels of a particular job + job_details = Job.objects.get(id=job_id) + labels_queryset = job_details.get_labels() + labels_list = list(labels_queryset.values()) - for i in range(0, len(annotation_audio_chunk_file_paths)): - final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"] }) + labels_mapping = {} + + for label in labels_list: + labels_mapping[label["id"]] = label + + label_attributes_queryset = AttributeSpec.objects.filter(label=label["id"]) + + attributes_list = list(label_attributes_queryset.values()) + labels_mapping[label["id"]]["attributes"] = {} + + for attribute in attributes_list: + labels_mapping[label["id"]]["attributes"][attribute["id"]] = attribute + + slogger.glob.debug("JOB LABELS ATTRIBUTES") + slogger.glob.debug(json.dumps(attributes_list)) + + + slogger.glob.debug("JOB LABELS") + slogger.glob.debug(json.dumps(labels_list)) + + # audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav") + # with wave.open(audio_file_path, 'wb') as wave_file: + # wave_file.setnchannels(1) + # wave_file.setsampwidth(4) + # wave_file.setframerate(44100) + # wave_file.writeframes(concat_array) + + annotation_audio_chunk_file_paths = chunk_annotation_audio(concat_array, temp_dir, annotations) + + for i in range(0, len(annotation_audio_chunk_file_paths)): + annotation_attribute_id = annotations[i]["attributes"][0]["spec_id"] + label_attributes = labels_mapping[annotations[i]["label_id"]]["attributes"] + annotation_attribute = label_attributes[annotation_attribute_id] + attribute_name = annotation_attribute["name"] + attribute_val = annotations[i]["attributes"][0]["value"] + + final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"], "label" : labels_mapping[annotations[i]["label_id"]]["name"], "attribute_name" : attribute_name, "attribute_value" : attribute_val, "start" : annotations[i]["points"][0], "end" : annotations[i]["points"][3]}) + + slogger.glob.debug("JOB ANNOTATION DATA") + slogger.glob.debug(json.dumps(final_data)) + slogger.glob.debug("All ANNOTATIONs DATA") + slogger.glob.debug(json.dumps(annotations)) return final_data, annotation_audio_chunk_file_paths def convert_annotation_data_format(data, format_name): if format_name == "Common Voice": return data elif format_name == "Librispeech": - data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"]}, data)) + data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"], "start" : x["start"], "end" : x["end"]}, data)) elif format_name == "VoxPopuli": language_id_mapping = {"en" : 0} - data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accent"]}, data)) + data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accents"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"], "start" : x["start"], "end" : x["end"]}, data)) elif format_name == "Ted-Lium": - data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : ""}, data)) + data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : "", "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"], "start" : x["start"], "end" : x["end"]}, data)) return data def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False): @@ -1074,6 +1111,9 @@ def export_audino_task(task_id, dst_file, format_name, server_url=None, save_ima final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data(job.db_job.id, dst_file, job, temp_dir_base, temp_dir) + # Convert the data into a format + final_data = convert_annotation_data_format(final_data, format_name) + final_task_data.append(final_data) final_annotation_chunk_paths.append(annotation_audio_chunk_file_paths) diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index c30365ef6b51..962176b66194 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -179,6 +179,7 @@ class _TaskBackupBase(_BackupBase): def _prepare_task_meta(self, task): allowed_fields = { 'name', + 'segment_duration', 'bug_tracker', 'status', 'subset', @@ -232,6 +233,12 @@ def _prepare_annotations(self, annotations, label_mapping): 'attributes', 'shapes', 'elements', + 'gender', + 'age', + 'accent', + 'transcript', + 'locale', + 'emotion' } def _update_attribute(attribute, label): @@ -328,6 +335,7 @@ def __init__(self, pk, version=Version.V1): self._db_task = models.Task.objects.prefetch_related('data__images', 'annotation_guide__assets').select_related('data__video', 'annotation_guide').get(pk=pk) self._db_data = self._db_task.data self._version = version + self.logger = slogger.task[pk] db_labels = (self._db_task.project if self._db_task.project_id else self._db_task).label_set.all().prefetch_related( 'attributespec_set') @@ -382,6 +390,8 @@ def _write_task(self, zip_object, target_dir=None): def _write_manifest(self, zip_object, target_dir=None): def serialize_task(): task_serializer = TaskReadSerializer(self._db_task) + # self.logger.info("WRITE MANIFEST") + # self.logger.info(task_serializer.data) for field in ('url', 'owner', 'assignee'): task_serializer.fields.pop(field) @@ -641,6 +651,8 @@ def _write_data(zip_object): jobs = self._manifest.pop('jobs') self._prepare_task_meta(self._manifest) + self._logger.info("DEBUG IMPORT") + self._logger.info(self._manifest) self._manifest['owner_id'] = self._user_id self._manifest['project_id'] = self._project_id diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 66c1a396ce20..9114e1033fd6 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -10,11 +10,12 @@ import itertools import struct from enum import IntEnum +import chardet from abc import ABC, abstractmethod from contextlib import closing from typing import Iterable -from cvat.apps.engine.log import ServerLogManager -slogger = ServerLogManager(__name__) +# from cvat.apps.engine.log import ServerLogManager +# slogger = ServerLogManager(__name__) import av import numpy as np @@ -505,6 +506,26 @@ def _has_frame(self, i): return False + def get_total_frames(self): + total_frame = 0 + with self._get_av_container() as container: + stream = container.streams.audio[0] + stream.thread_type = 'AUTO' + for packet in container.demux(stream): + for image in packet.decode(): + total_frame += 1 + + return total_frame + + def get_file_encoding(self, file_path): + + with open(file_path, 'rb') as f: + rawdata = f.read(1024) + result = chardet.detect(rawdata) + encoding = result['encoding'] + + return encoding + def __iter__(self): with self._get_av_container() as container: stream = container.streams.audio[0] @@ -523,7 +544,12 @@ def get_progress(self, pos): def _get_av_container(self): if isinstance(self._source_path[0], io.BytesIO): self._source_path[0].seek(0) # required for re-reading - return av.open(self._source_path[0]) + + encoding = self.get_file_encoding(self._source_path[0]) + if encoding: + return av.open(self._source_path[0], metadata_encoding = encoding) + else: + return av.open(self._source_path[0]) def _get_duration(self): with self._get_av_container() as container: @@ -543,25 +569,24 @@ def _get_duration(self): def get_preview(self, frame): with self._get_av_container() as container: - stream = container.streams.video[0] + stream = container.streams.audio[0] tb_denominator = stream.time_base.denominator needed_time = int((frame / stream.guessed_rate) * tb_denominator) container.seek(offset=needed_time, stream=stream) for packet in container.demux(stream): for frame in packet.decode(): return self._get_preview(frame.to_image() if not stream.metadata.get('rotate') \ - else av.VideoFrame().from_ndarray( + else av.AudioFrame().from_ndarray( rotate_image( frame.to_ndarray(format='bgr24'), - 360 - int(container.streams.video[0].metadata.get('rotate')) + 360 - int(container.streams.audio[0].metadata.get('rotate')) ), format ='bgr24' ).to_image() ) def get_image_size(self, i): - image = (next(iter(self)))[0] - return image.width, image.height + return 1, 1 class FragmentMediaReader: def __init__(self, chunk_number, chunk_size, start, stop, step=1): @@ -953,44 +978,6 @@ def save_as_chunk(self, images, chunk_path): self._encode_images(images, output_container, output_v_stream) return [(input_w, input_h)] -class AudioCompressedChunkWriter(AudioChunkWriter): - def __init__(self, quality): - super().__init__(quality) - if self._codec_name == 'libx264': - self._codec_opts = { - 'profile': 'baseline', - 'coder': '0', - 'crf': str(self._image_quality), - 'wpredp': '0', - 'flags': '-loop', - } - - def save_as_chunk(self, images, chunk_path): - if not images: - raise Exception('no images to save') - - input_w = images[0][0].width - input_h = images[0][0].height - - downscale_factor = 1 - while input_h / downscale_factor >= 1080: - downscale_factor *= 2 - - output_h = input_h // downscale_factor - output_w = input_w // downscale_factor - - with av.open(chunk_path, 'w', format=self.FORMAT) as output_container: - output_v_stream = self._add_video_stream( - container=output_container, - w=output_w, - h=output_h, - rate=self._output_fps, - options=self._codec_opts, - ) - - self._encode_images(images, output_container, output_v_stream) - return [(input_w, input_h)] - def _is_archive(path): mime = mimetypes.guess_type(path) mime_type = mime[0] @@ -1043,18 +1030,18 @@ def _is_zip(path): 'mode': 'annotation', 'unique': False, }, - 'video': { - 'has_mime_type': _is_video, - 'extractor': VideoReader, - 'mode': 'interpolation', - 'unique': True, - }, 'audio': { 'has_mime_type': _is_audio, 'extractor': AudioReader, 'mode': 'interpolation', 'unique': False, }, + 'video': { + 'has_mime_type': _is_video, + 'extractor': VideoReader, + 'mode': 'interpolation', + 'unique': True, + }, 'archive': { 'has_mime_type': _is_archive, 'extractor': ArchiveReader, diff --git a/cvat/apps/engine/migrations/0084_job_ai_audio_annotation_error_msg_and_more.py b/cvat/apps/engine/migrations/0084_job_ai_audio_annotation_error_msg_and_more.py index d5683154d31d..a81f72ddd46a 100644 --- a/cvat/apps/engine/migrations/0084_job_ai_audio_annotation_error_msg_and_more.py +++ b/cvat/apps/engine/migrations/0084_job_ai_audio_annotation_error_msg_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.6 on 2024-04-15 05:10 +# Generated by Django 4.2.6 on 2024-04-19 09:27 import cvat.apps.engine.models from django.db import migrations, models diff --git a/cvat/apps/engine/migrations/0085_alter_task_total_audio_duration.py b/cvat/apps/engine/migrations/0085_alter_task_total_audio_duration.py new file mode 100644 index 000000000000..c99d69f9ea54 --- /dev/null +++ b/cvat/apps/engine/migrations/0085_alter_task_total_audio_duration.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.6 on 2024-04-19 09:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("engine", "0084_job_ai_audio_annotation_error_msg_and_more"), + ] + + operations = [ + ] diff --git a/cvat/apps/engine/migrations/0086_task_segment_duration.py b/cvat/apps/engine/migrations/0086_task_segment_duration.py new file mode 100644 index 000000000000..5568c3cad7bb --- /dev/null +++ b/cvat/apps/engine/migrations/0086_task_segment_duration.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.6 on 2024-04-19 13:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("engine", "0085_alter_task_total_audio_duration"), + ] + + operations = [ + migrations.AddField( + model_name="task", + name="segment_duration", + field=models.PositiveIntegerField(default=None, null=True), + ), + ] diff --git a/cvat/apps/engine/migrations/0087_remove_task_total_audio_duration_and_more.py b/cvat/apps/engine/migrations/0087_remove_task_total_audio_duration_and_more.py new file mode 100644 index 000000000000..cf68450032fe --- /dev/null +++ b/cvat/apps/engine/migrations/0087_remove_task_total_audio_duration_and_more.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.6 on 2024-05-06 13:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("engine", "0086_task_segment_duration"), + ] + + operations = [ + migrations.AddField( + model_name="task", + name="audio_total_duration", + field=models.PositiveIntegerField(default=None, null=True), + ), + ] diff --git a/cvat/apps/engine/migrations/__init__.py b/cvat/apps/engine/migrations/__init__.py index 27d2190951b4..1a8ef647b935 100644 --- a/cvat/apps/engine/migrations/__init__.py +++ b/cvat/apps/engine/migrations/__init__.py @@ -1,5 +1,3 @@ - -# Copyright (C) 2018-2022 Intel Corporation +# Copyright (C) 2024 CVAT.ai Corporation # -# SPDX-License-Identifier: MIT - +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 81ee63ca5425..2c500d7c55b9 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -188,7 +188,7 @@ class UploadMixin: 'Tus-Max-Size': _tus_max_file_size, 'Access-Control-Allow-Origin': "*", 'Access-Control-Allow-Methods': "PATCH,HEAD,GET,POST,OPTIONS", - 'Access-Control-Expose-Headers': "Tus-Resumable,upload-length,upload-metadata,Location,Upload-Offset", + 'Access-Control-Expose-Headers': "Tus-Resumable,upload-length,upload-metadata,Location,Upload-Offset,Upload-Filename", 'Access-Control-Allow-Headers': "Tus-Resumable,upload-length,upload-metadata,Location,Upload-Offset,content-type", 'Cache-Control': 'no-store' } @@ -284,8 +284,8 @@ def init_tus_upload(self, request): tus_file = TusFile.create_file(metadata, file_size, self.get_upload_dir()) location = request.build_absolute_uri() - if 'HTTP_X_FORWARDED_HOST' not in request.META: - location = request.META.get('HTTP_ORIGIN') + request.META.get('PATH_INFO') + # if 'HTTP_X_FORWARDED_HOST' not in request.META: + # location = request.META.get('HTTP_ORIGIN') + request.META.get('PATH_INFO') if import_type in ('backup', 'annotations', 'datasets'): scheduler = django_rq.get_scheduler(settings.CVAT_QUEUES.CLEANING.value) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 9efbea83daec..edd497b77b68 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -405,6 +405,7 @@ class Task(models.Model): project = models.ForeignKey(Project, on_delete=models.CASCADE, null=True, blank=True, related_name="tasks", related_query_name="task") + audio_total_duration = models.PositiveIntegerField(null=True, default=None) name = SafeCharField(max_length=256) mode = models.CharField(max_length=32) owner = models.ForeignKey(User, null=True, blank=True, @@ -429,6 +430,7 @@ class Task(models.Model): blank=True, on_delete=models.SET_NULL, related_name='+') target_storage = models.ForeignKey('Storage', null=True, default=None, blank=True, on_delete=models.SET_NULL, related_name='+') + segment_duration = models.PositiveIntegerField(null=True, default=None) # Extend default permission model class Meta: diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 2964f1e7fae1..cbae86108983 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -635,6 +635,7 @@ def create(self, validated_data): size = task.data.size valid_frame_ids = task.data.get_valid_frame_indices() + segment_size = task.segment_size frame_selection_method = validated_data.pop("frame_selection_method", None) if frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM: @@ -645,15 +646,36 @@ def create(self, validated_data): f"the number of the task frames ({size})" ) - seed = validated_data.pop("seed", None) + num_segments = size // segment_size + jobs_frame_list = [] + for i in range(num_segments): + start = i * segment_size + end = (i+1) * segment_size - 1 + array = [j for j in range(start,end+1)] + jobs_frame_list.append(array) + + # if there's a remainder, create the last array + if size % segment_size != 0: + start = num_segments * segment_size + end = size - 1 + array = [j for j in range(start,end+1)] + jobs_frame_list.append(array) + + #Random select from the list + import math, random + random_jobs_no = math.ceil(frame_count / segment_size) + selected_jobs_frames = random.sample(jobs_frame_list, random_jobs_no) + frames = sorted([item for sublist in selected_jobs_frames for item in sublist]) + + # seed = validated_data.pop("seed", None) # The RNG backend must not change to yield reproducible results, # so here we specify it explicitly - from numpy import random - rng = random.Generator(random.MT19937(seed=seed)) - frames = rng.choice( - list(valid_frame_ids), size=frame_count, shuffle=False, replace=False - ).tolist() + # from numpy import random + # rng = random.Generator(random.MT19937(seed=seed)) + # frames = rng.choice( + # list(valid_frame_ids), size=frame_count, shuffle=False, replace=False + # ).tolist() elif frame_selection_method == models.JobFrameSelectionMethod.MANUAL: frames = validated_data.pop("frames") @@ -1049,6 +1071,7 @@ class TaskReadSerializer(serializers.ModelSerializer): dimension = serializers.CharField(allow_blank=True, required=False) target_storage = StorageSerializer(required=False, allow_null=True) source_storage = StorageSerializer(required=False, allow_null=True) + segment_duration = serializers.IntegerField(allow_null=True) jobs = JobsSummarySerializer(url_filter_key='task_id', source='segment_set') labels = LabelsSummarySerializer(source='*') @@ -1058,7 +1081,7 @@ class Meta: 'bug_tracker', 'created_date', 'updated_date', 'overlap', 'segment_size', 'status', 'data_chunk_size', 'data_compressed_chunk_type', 'guide_id', 'data_original_chunk_type', 'size', 'image_quality', 'data', 'dimension', - 'subset', 'organization', 'target_storage', 'source_storage', 'jobs', 'labels', + 'subset', 'organization', 'target_storage', 'source_storage', 'segment_duration', 'jobs', 'labels', ) read_only_fields = fields extra_kwargs = { @@ -1074,12 +1097,13 @@ class TaskWriteSerializer(WriteOnceMixin, serializers.ModelSerializer): project_id = serializers.IntegerField(required=False, allow_null=True) target_storage = StorageSerializer(required=False, allow_null=True) source_storage = StorageSerializer(required=False, allow_null=True) + segment_duration = serializers.IntegerField(required=False, allow_null=True) class Meta: model = models.Task fields = ('url', 'id', 'name', 'project_id', 'owner_id', 'assignee_id', 'bug_tracker', 'overlap', 'segment_size', 'labels', 'subset', - 'target_storage', 'source_storage', + 'target_storage', 'source_storage', 'segment_duration' ) write_once_fields = ('overlap', 'segment_size') diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 68dd8834cead..5d7704b873fc 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -2,7 +2,8 @@ # Copyright (C) 2022-2023 CVAT.ai Corporation # # SPDX-License-Identifier: MIT - +import av +import math import itertools import fnmatch import os @@ -27,7 +28,7 @@ from cvat.apps.engine import models from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.media_extractors import (MEDIA_TYPES, ImageListReader, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, AudioChunkWriter, AudioCompressedChunkWriter, +from cvat.apps.engine.media_extractors import (MEDIA_TYPES, ImageListReader, Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter, AudioChunkWriter, ValidateDimension, ZipChunkWriter, ZipCompressedChunkWriter, get_mime, sort) from cvat.apps.engine.utils import av_scan_paths,get_rq_job_meta, define_dependent_job, get_rq_lock_by_user, preload_images from cvat.utils.http import make_requests_session, PROXIES_FOR_UNTRUSTED_URLS @@ -111,8 +112,43 @@ def _get_task_segment_data( *, data_size: Optional[int] = None, job_file_mapping: Optional[JobFileMapping] = None, + segment_duration: Optional[int] = None ) -> SegmentsParams: - if job_file_mapping is not None: + + # segment_duration = 3000 + + slogger.glob.debug("Segment Duration") + slogger.glob.debug(segment_duration) + if segment_duration is not None: + # Total audio duration in milliseconds + audio_total_duration = (db_task.audio_total_duration) + + if audio_total_duration == 0: + return SegmentsParams(iter([]), 0, 0) + + num_segments = max(1, math.ceil(audio_total_duration / segment_duration)) + + slogger.glob.debug("Num segments") + slogger.glob.debug(num_segments) + + slogger.glob.debug("Num frames") + slogger.glob.debug(db_task.data.size) + + def _segments(): + start_time = 0 + for _ in range(num_segments): + stop_time = start_time + segment_duration - 1 + yield SegmentParams(start_time, stop_time) + start_time = stop_time + 1 + + segments = _segments() + slogger.glob.debug("Segment length") + slogger.glob.debug(len(segments)) + segment_size = 0 + overlap = 0 + + elif job_file_mapping is not None: + def _segments(): # It is assumed here that files are already saved ordered in the task # Here we just need to create segments by the job sizes @@ -134,6 +170,13 @@ def _segments(): segment_size = db_task.segment_size segment_step = segment_size + + if segment_size == 0: + raise ValueError("Segment size cannot be zero.") + + slogger.glob.debug(data_size) + slogger.glob.debug(segment_size) + slogger.glob.debug(segment_step) if segment_size == 0 or segment_size > data_size: segment_size = data_size @@ -141,7 +184,8 @@ def _segments(): # Otherwise a task contains an extra segment segment_step = sys.maxsize - overlap = 5 if db_task.mode == 'interpolation' else 0 + # overlap = 5 if db_task.mode == 'interpolation' else 0 + overlap = 0 if db_task.overlap is not None: overlap = min(db_task.overlap, segment_size // 2) @@ -210,6 +254,9 @@ def _count_files(data): def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): mime = get_mime(full_path) + + slogger.glob.debug("Mimetype") + slogger.glob.debug(mime) if mime in counter: counter[mime].append(rel_path) elif rel_path.endswith('.jsonl'): @@ -651,6 +698,7 @@ def _create_thread( # count and validate uploaded files media = _count_files(data) + media, task_mode = _validate_data(media, manifest_files) if job_file_mapping is not None and task_mode != 'annotation': @@ -880,7 +928,7 @@ def update_progress(progress): if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 - status_message = 'CVAT is preparing data chunks' + status_message = 'Audino is preparing data chunks' if not progress: status_message = '{} {}'.format(status_message, progress_animation[update_progress.call_counter]) job.meta['status'] = status_message @@ -909,19 +957,113 @@ def update_progress(progress): compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality, **kwargs) - # calculate chunk size if it isn't specified - if db_data.chunk_size is None: - if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): - if not is_data_in_cloud: - w, h = extractor.get_image_size(0) - else: - img_properties = manifest[0] - w, h = img_properties['width'], img_properties['height'] - area = h * w - db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) + def get_file_encoding(file_path): + import chardet + + with open(file_path, 'rb') as f: + rawdata = f.read(1024) + result = chardet.detect(rawdata) + encoding = result['encoding'] + + return encoding + def get_audio_duration(file_path): + encoding=get_file_encoding(file_path) + slogger.glob.debug("ENCODING") + slogger.glob.debug(encoding) + # Open the audio file + if encoding: + container = av.open(file_path, metadata_encoding=encoding) else: - db_data.chunk_size = 36 + container = av.open(file_path) + + # Get the first audio stream + audio_stream = next((stream for stream in container.streams if stream.codec.type == 'audio'), None) + + if not audio_stream: + print("Error: No audio stream found in the file.") + return None + + # Get the duration in seconds based on stream information + duration_milliseconds = int(audio_stream.duration * audio_stream.time_base * 1000) + + slogger.glob.debug("FFF AUDIO DURATION") + slogger.glob.debug(audio_stream.duration) + + slogger.glob.debug("PPP AUDIO DURATION") + slogger.glob.debug(audio_stream.time_base) + + # Close the container + container.close() + + return duration_milliseconds + + + db_task.audio_total_duration = None + + if MEDIA_TYPE == "audio": + + slogger.glob.debug("Before segment_duration") + slogger.glob.debug(db_task.segment_duration) + + segment_duration = db_task.segment_duration if db_task.segment_duration is not None else 600000 + db_task.audio_total_duration = get_audio_duration(details['source_path'][0]) + # db_task.data.audio_total_duration = 720000 #get_audio_duration(details['source_path'][0]) + total_audio_frames = extractor.get_total_frames() + + slogger.glob.debug("TOTAL AUDIO DURATION") + slogger.glob.debug(db_task.audio_total_duration) + + num_frames_per_millisecond = total_audio_frames / db_task.audio_total_duration + + if segment_duration == 0: + segment_duration = db_task.audio_total_duration + # db_task.segment_size = 0 + # db_data.chunk_size = db_task.audio_total_duration*num_frames_per_millisecond + # else: + + slogger.glob.debug("num_frames_per_millisecond") + slogger.glob.debug(num_frames_per_millisecond) + + slogger.glob.debug("segment_duration") + slogger.glob.debug(segment_duration) + + num_frames_per_segment_duration = num_frames_per_millisecond*segment_duration + db_task.segment_size = int(round(num_frames_per_segment_duration)) + + num_segments = max(1, int(math.ceil(db_task.audio_total_duration / segment_duration))) + + slogger.glob.debug("Segment Size Before") + slogger.glob.debug(db_task.segment_size) + + slogger.glob.debug("Segment Size After") + slogger.glob.debug(db_task.segment_size) + + slogger.glob.debug("Num segments") + slogger.glob.debug(num_segments) + + slogger.glob.debug("Num frames") + slogger.glob.debug(total_audio_frames) + + slogger.glob.debug("Audio Duration") + slogger.glob.debug(db_task.audio_total_duration) + + # Default chunk size = entire frames + db_data.chunk_size = db_task.segment_size #db_task.data.size + else: + if db_data.chunk_size is None: + if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): + if not is_data_in_cloud: + w, h = extractor.get_image_size(0) + else: + img_properties = manifest[0] + w, h = img_properties['width'], img_properties['height'] + area = h * w + db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) + else: + db_data.chunk_size = 36 + slogger.glob.debug("OPPPPP CHUNK SIZE") + slogger.glob.debug(db_data.chunk_size) video_path = "" video_size = (0, 0) diff --git a/cvat/apps/engine/templates/audio_annotation/annotation_message.html b/cvat/apps/engine/templates/audio_annotation/annotation_message.html new file mode 100644 index 000000000000..172a7de8f14c --- /dev/null +++ b/cvat/apps/engine/templates/audio_annotation/annotation_message.html @@ -0,0 +1,176 @@ + +{% load account %}{% user_display user as user_display %}{% load i18n %}{% autoescape off %} +{% load static %} + + + + + + Annotation Job Completed + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + Logo + +
+
+ + + + +
+

+ Your annotation job is completed +

+
+
+ + + + + + + + + + + + +
+ {% blocktrans %} +

Dear {{ username }},

+

Your audio annotation job (ID: {{ job_id }}) has been completed.

+

You can check the results at the following link:

+ {% endblocktrans %} +
+ + + + +
+ + + + +
+ {% blocktrans %} + + View Job Details + + {% endblocktrans %} +
+
+
+
+ {% blocktrans %} +

Thank you for using our service!

+ {% endblocktrans %} {% endautoescape %} +
+
+ + + + +
+

If you didn't request this, please ignore this email.

+
+
+ + diff --git a/cvat/apps/engine/templates/audio_annotation/annotation_subject.txt b/cvat/apps/engine/templates/audio_annotation/annotation_subject.txt new file mode 100644 index 000000000000..749732ec110d --- /dev/null +++ b/cvat/apps/engine/templates/audio_annotation/annotation_subject.txt @@ -0,0 +1,4 @@ +{% load i18n %} +{% autoescape off %} +{% blocktrans %} Your Audio Annotation Job Has Been Successfully Completed!{% endblocktrans %} +{% endautoescape %} diff --git a/cvat/apps/engine/templates/audio_annotation/error_message.html b/cvat/apps/engine/templates/audio_annotation/error_message.html new file mode 100644 index 000000000000..386aae756754 --- /dev/null +++ b/cvat/apps/engine/templates/audio_annotation/error_message.html @@ -0,0 +1,177 @@ + +{% load account %}{% user_display user as user_display %}{% load i18n %}{% autoescape off %} +{% load static %} + + + + + + Annotation Job Error + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + Logo + +
+
+ + + + +
+

+ Error with your annotation job +

+
+
+ + + + + + + + + + + + +
+ {% blocktrans %} +

Dear {{ username }},

+

Unfortunately, there was an error while processing your audio annotation job (ID: {{ job_id }}).

+

Error details: {{ error }}

+

You can check the status at the following link:

+ {% endblocktrans %} +
+ + + + +
+ + + + +
+ {% blocktrans %} + + View Job Details + + {% endblocktrans %} +
+
+
+
+ {% blocktrans %} +

We apologize for the inconvenience and are working to resolve the issue.

+ {% endblocktrans %} {% endautoescape %} +
+
+ + + + +
+

If you didn't request this, please ignore this email.

+
+
+ + diff --git a/cvat/apps/engine/templates/audio_annotation/error_subject.txt b/cvat/apps/engine/templates/audio_annotation/error_subject.txt new file mode 100644 index 000000000000..8681f4ddd72c --- /dev/null +++ b/cvat/apps/engine/templates/audio_annotation/error_subject.txt @@ -0,0 +1,4 @@ +{% load i18n %} +{% autoescape off %} +{% blocktrans %} Error with Your Audio Annotation Job!{% endblocktrans %} +{% endautoescape %} diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index e6d7cce1a5ed..bbf076734d1e 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -27,6 +27,10 @@ from django.db.models.query import Prefetch from django.http import HttpResponse, HttpResponseNotFound, HttpResponseBadRequest from django.utils import timezone +from django.core.mail import send_mail +from django.contrib.sites.shortcuts import get_current_site +from django.core.exceptions import ImproperlyConfigured +from allauth.account.adapter import get_adapter from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( @@ -65,7 +69,7 @@ LabeledDataSerializer, ProjectReadSerializer, ProjectWriteSerializer, RqStatusSerializer, TaskReadSerializer, TaskWriteSerializer, - UserSerializer, PluginsSerializer, IssueReadSerializer, AIAudioAnnotationSerializer, ExportAudioAnnotationSerializer, + UserSerializer, PluginsSerializer, IssueReadSerializer, AIAudioAnnotationSerializer, AnnotationGuideReadSerializer, AnnotationGuideWriteSerializer, AssetReadSerializer, AssetWriteSerializer, IssueWriteSerializer, CommentReadSerializer, CommentWriteSerializer, CloudStorageWriteSerializer, @@ -708,7 +712,7 @@ def _check_frame_range(self, frame: int): raise ValidationError("The frame number doesn't belong to the job") def __call__(self, request, start, stop, db_data): - if self.type == 'chunk' and self.job.segment.type == SegmentType.SPECIFIC_FRAMES: + if self.type == 'chunk' and self.job.segment.type == SegmentType.SPECIFIC_FRAMES and self.job.segment.task.data.compressed_chunk_type != 'audio': frame_provider = FrameProvider(db_data, self.dimension) start_chunk = frame_provider.get_chunk_number(start) @@ -1941,6 +1945,7 @@ def metadata(self, request, pk): data_start_frame = db_data.start_frame + start_frame * frame_step data_stop_frame = min(db_data.stop_frame, db_data.start_frame + stop_frame * frame_step) frame_set = db_job.segment.frame_set + segment_size = db_job.segment.task.segment_size if request.method == 'PATCH': serializer = DataMetaWriteSerializer(instance=db_data, data=request.data) @@ -1982,6 +1987,7 @@ def metadata(self, request, pk): db_data.stop_frame = data_stop_frame db_data.size = len(frame_set) db_data.included_frames = db_job.segment.frames or None + db_data.segment_size = segment_size frame_meta = [{ 'width': item.width, @@ -2021,6 +2027,30 @@ class AIAudioAnnotationViewSet(viewsets.ModelViewSet): filter_fields = [] filter_backends = [] + def send_annotation_email(self, request, template_name, err=None): + job_id = request.data.get('jobId') + if settings.EMAIL_BACKEND is None: + raise ImproperlyConfigured("Email backend is not configured") + + # Find the user associated with current request + user = self.request.user + + target_email = user.email + current_site = get_current_site(request) + site_name = current_site.name + domain = current_site.domain + context = { + 'username': user.username, + 'domain': domain, + 'site_name': site_name, + 'job_id': job_id, + 'protocol': 'https' if request.is_secure() else 'http' + } + if err: + context['error'] = err + + get_adapter(request).send_mail(f'audio_annotation/{template_name}', target_email, context) + @action(detail=False, methods=['post'], url_path='save') def save_segments(self, request): try: @@ -2028,8 +2058,8 @@ def save_segments(self, request): # Find labels of a particular job job = Job.objects.get(id=job_id) - labels_queryset = job.get_labels() - labels_list = list(labels_queryset.values()) + # labels_queryset = job.get_labels() + # labels_list = list(labels_queryset.values()) segments = request.data.get('segments') @@ -2060,15 +2090,18 @@ def save_segments(self, request): job.save() + self.send_annotation_email(request, 'annotation') return Response({'success': True, 'segments': saved_segments}, status=status.HTTP_201_CREATED) except Exception as e: + self.send_annotation_email(request, 'error', err=str(e)) return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) @action(detail=False, methods=['post'], url_path='ai-annotate') def request_ai_annotation(self, request): try: job_id = request.data.get('jobId') + lang = request.data.get('lang') authHeader = request.headers.get('Authorization') # Find labels of a particular job @@ -2086,7 +2119,10 @@ def request_ai_annotation(self, request): job.save() # Iterate over segments and save to the model - r = requests.post("http://35.208.178.37:8000/transcript", json={ "jobId" : job_id, "authToken" : authHeader, "background_task_id" : background_task_id}) + ai_annotation_host = os.getenv('AI_ANNOTATION_HOST', '35.208.178.37') + ai_annotation_port = int(os.getenv('AI_ANNOTATION_PORT', "8000")) + url = f"http://{ai_annotation_host}:{ai_annotation_port}/transcript" + r = requests.post(url, json={ "jobId" : job_id, "lang" : lang, "authToken" : authHeader, "background_task_id" : background_task_id}) return Response({'success': True}, status=status.HTTP_200_OK) diff --git a/cvat/apps/iam/templates/account/email/email_confirmation_message.html b/cvat/apps/iam/templates/account/email/email_confirmation_message.html index 4ef34dd474f8..3227fc1054b2 100644 --- a/cvat/apps/iam/templates/account/email/email_confirmation_message.html +++ b/cvat/apps/iam/templates/account/email/email_confirmation_message.html @@ -121,9 +121,9 @@
- + Logo {% blocktrans with site_name=current_site.name site_domain=current_site.domain %}

- Thank you for signing up for CVAT! + Thank you for signing up for Audino!

To complete registration and start annotating, simply tap the button below and confirm your email address. diff --git a/cvat/apps/organizations/templates/invitation/invitation_message.html b/cvat/apps/organizations/templates/invitation/invitation_message.html index b511462958d4..4b1f05f7653f 100644 --- a/cvat/apps/organizations/templates/invitation/invitation_message.html +++ b/cvat/apps/organizations/templates/invitation/invitation_message.html @@ -122,9 +122,9 @@
- + Logo {% blocktrans %}

- You're receiving this email because you've been invited to join {{ organization_name }} organization in CVAT by {{ invitation_owner }} at {{ site_name }}. + You're receiving this email because you've been invited to join {{ organization_name }} organization in Audino by {{ invitation_owner }} at {{ site_name }}.

To join organization and start annotating, simply tap the button below and complete registration. diff --git a/cvat/apps/organizations/templates/invitation/invitation_subject.txt b/cvat/apps/organizations/templates/invitation/invitation_subject.txt index 4fedaaf7bed2..53ad2eddd1e2 100644 --- a/cvat/apps/organizations/templates/invitation/invitation_subject.txt +++ b/cvat/apps/organizations/templates/invitation/invitation_subject.txt @@ -1,4 +1,4 @@ {% load i18n %} {% autoescape off %} -{% blocktrans %}You're invited to join {{ organization_name }} organization in CVAT!{% endblocktrans %} +{% blocktrans %}You're invited to join {{ organization_name }} organization in Audino!{% endblocktrans %} {% endautoescape %} diff --git a/cvat/apps/quality_control/migrations/0002_annotationconflict_character_error_rate_and_more.py b/cvat/apps/quality_control/migrations/0002_annotationconflict_character_error_rate_and_more.py new file mode 100644 index 000000000000..2347c7e75475 --- /dev/null +++ b/cvat/apps/quality_control/migrations/0002_annotationconflict_character_error_rate_and_more.py @@ -0,0 +1,41 @@ +# Generated by Django 4.2.6 on 2024-06-15 11:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("quality_control", "0001_initial"), + ] + + operations = [ + migrations.AddField( + model_name="annotationconflict", + name="character_error_rate", + field=models.IntegerField(default=0, null=True), + ), + migrations.AddField( + model_name="annotationconflict", + name="word_error_rate", + field=models.IntegerField(default=0, null=True), + ), + migrations.AlterField( + model_name="annotationconflict", + name="type", + field=models.CharField( + choices=[ + ("missing_annotation", "MISSING_ANNOTATION"), + ("extra_annotation", "EXTRA_ANNOTATION"), + ("mismatching_label", "MISMATCHING_LABEL"), + ("low_overlap", "LOW_OVERLAP"), + ("mismatching_direction", "MISMATCHING_DIRECTION"), + ("mismatching_attributes", "MISMATCHING_ATTRIBUTES"), + ("mismatching_groups", "MISMATCHING_GROUPS"), + ("covered_annotation", "COVERED_ANNOTATION"), + ("mismatching_extra_parameters", "MISMATCHING_EXTRA_PARAMETERS"), + ("mismatching_transcript", "MISMATCHING_TRANSCRIPT"), + ], + max_length=32, + ), + ), + ] diff --git a/cvat/apps/quality_control/models.py b/cvat/apps/quality_control/models.py index e4e39f5fb922..16fe703ceaf4 100644 --- a/cvat/apps/quality_control/models.py +++ b/cvat/apps/quality_control/models.py @@ -24,6 +24,8 @@ class AnnotationConflictType(str, Enum): MISMATCHING_ATTRIBUTES = "mismatching_attributes" MISMATCHING_GROUPS = "mismatching_groups" COVERED_ANNOTATION = "covered_annotation" + MISMATCHING_EXTRA_PARAMETERS = "mismatching_extra_parameters" + MISMATCHING_TRANSCRIPT = "mismatching_transcript" def __str__(self) -> str: return self.value @@ -134,6 +136,8 @@ class AnnotationConflict(models.Model): frame = models.PositiveIntegerField() type = models.CharField(max_length=32, choices=AnnotationConflictType.choices()) severity = models.CharField(max_length=32, choices=AnnotationConflictSeverity.choices()) + word_error_rate = models.IntegerField(default=0, null=True) + character_error_rate = models.IntegerField(default=0, null=True) annotation_ids: Sequence[AnnotationId] diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 1f3ff5682569..11e73165ba4e 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -107,6 +107,8 @@ class AnnotationConflict(_Serializable): frame_id: int type: AnnotationConflictType annotation_ids: List[AnnotationId] + word_error_rate: Optional[float] = None + character_error_rate: Optional[float] = None @property def severity(self) -> AnnotationConflictSeverity: @@ -114,6 +116,7 @@ def severity(self) -> AnnotationConflictSeverity: AnnotationConflictType.MISSING_ANNOTATION, AnnotationConflictType.EXTRA_ANNOTATION, AnnotationConflictType.MISMATCHING_LABEL, + AnnotationConflictType.MISMATCHING_TRANSCRIPT, ]: severity = AnnotationConflictSeverity.ERROR elif self.type in [ @@ -122,6 +125,7 @@ def severity(self) -> AnnotationConflictSeverity: AnnotationConflictType.MISMATCHING_DIRECTION, AnnotationConflictType.MISMATCHING_GROUPS, AnnotationConflictType.COVERED_ANNOTATION, + AnnotationConflictType.MISMATCHING_EXTRA_PARAMETERS, ]: severity = AnnotationConflictSeverity.WARNING else: @@ -144,6 +148,8 @@ def from_dict(cls, d: dict): frame_id=d["frame_id"], type=AnnotationConflictType(d["type"]), annotation_ids=list(AnnotationId.from_dict(v) for v in d["annotation_ids"]), + word_error_rate=d["word_error_rate"], + character_error_rate =d["character_error_rate"], ) @@ -161,6 +167,15 @@ class ComparisonParameters(_Serializable): compare_attributes: bool = True "Enables or disables attribute checks" + compare_extra_parameters: bool = True + "Enables or disables extra parameters checks for audio data" + + wer_threshold: float = 0.2 + "Used for distinction between matched and unmatched transcript at word level" + + cer_threshold: float = 0.2 + "Used for distinction between matched and unmatched transcript at character level" + ignored_attributes: List[str] = [] iou_threshold: float = 0.4 @@ -2077,6 +2092,572 @@ def generate_report(self) -> ComparisonReport: ) +class AudioDatasetComparator: + DEFAULT_SETTINGS = ComparisonParameters() + + def __init__( + self, + ds_data_provider: JobDataProvider, + gt_data_provider: JobDataProvider, + offset, + job_duration, + *, + settings: Optional[ComparisonParameters] = None, + ) -> None: + if settings is None: + settings = self.DEFAULT_SETTINGS + self.settings = settings + + self._ds_data_provider = ds_data_provider + self._gt_data_provider = gt_data_provider + self._offset = offset + self._job_duration = job_duration + self._job_id = self._ds_data_provider.job_id + + self._frame_results: Dict[int, ComparisonReportFrameSummary] = {} + self.included_frames = gt_data_provider.job_data._db_job.segment.frame_set + + self.iou_threshold = settings.iou_threshold + self.wer_threshold = settings.wer_threshold + self.cer_threshold = settings.cer_threshold + + self.ignored_attrs = set(settings.ignored_attributes) | { + "track_id", # changes from task to task, can't be defined manually with the same name + "keyframe", # indicates the way annotation obtained, meaningless to compare + "z_order", # changes from frame to frame, compared by other means + "group", # changes from job to job, compared by other means + "rotation", # handled by other means + "outside", # handled by other means + } + + def _dm_ann_to_ann_id(self, ann): + if ann in self._ds_data_provider.job_annotation.data['shapes']: + source_data_provider = self._ds_data_provider + elif ann in self._gt_data_provider.job_annotation.data['shapes']: + source_data_provider = self._gt_data_provider + else: + assert False + + source_ann_id = ann['id'] + ann_type = AnnotationType.SHAPE + shape_type = ann['type'] + + return AnnotationId( + obj_id=source_ann_id, type=ann_type, shape_type=shape_type, job_id=source_data_provider.job_id + ) + + def _find_audio_gt_conflicts(self): + start = self._ds_data_provider.job_data.start + end = self._ds_data_provider.job_data.stop - 1 + gt_frame_list = self._gt_data_provider.job_data._db_job.segment.frames + + # Check if any frame in gt_data_frame_array is in ds_data_frame_array + if not (start in gt_frame_list or end in gt_frame_list): + return # we need to compare only intersecting jobs + + ds_annotations = self._ds_data_provider.job_annotation.data['shapes'] + gt_annotations = self._gt_data_provider.job_annotation.data['shapes'] + + self._process_job(ds_annotations, gt_annotations) + + def _process_job(self, ds_annotations, gt_annotations): + job_id = self._job_id + job_results = self.match_annotations(ds_annotations, gt_annotations) + self._frame_results.setdefault(job_id, {}) + + self._generate_job_annotation_conflicts( + job_results, gt_annotations, ds_annotations + ) + + def match_annotations(self, ds_annotations, gt_annotations): + """ + Match annotations between two datasets. + This method should compare annotations based on their start and end times. + """ + def _interval_iou(interval1, interval2): + start1, end1 = interval1 + start2, end2 = interval2 + + start2 += self._offset + end2 += self._offset + + intersection = max(0, min(end1, end2) - max(start1, start2)) + union = max(end1, end2) - min(start1, start2) + return intersection / union if union > 0 else 0 + + job_start_time = self._offset - 0.1 + job_end_time = job_start_time + self._job_duration + 0.1 + + # Filter gt_annotations to include only those within the job's time bounds + gt_annotations = [ + gt_ann for gt_ann in gt_annotations + if job_start_time <= gt_ann['points'][0] and gt_ann['points'][3] <= job_end_time + ] + + + matches = [] + mismatches = [] + gt_unmatched = gt_annotations.copy() + ds_unmatched = ds_annotations.copy() + pairwise_distances = {} + + for gt_ann in gt_annotations: + matched = False + best_mismatch_pair = None + best_mismatch_iou = 0 # Initial best IoU for mismatches + + for ds_ann in ds_annotations: + gt_interval = (gt_ann['points'][0], gt_ann['points'][3]) + ds_interval = (ds_ann['points'][0], ds_ann['points'][3]) + iou = _interval_iou(gt_interval, ds_interval) + + if gt_ann['label_id'] == ds_ann['label_id']: + if iou >= self.iou_threshold: + matches.append((gt_ann, ds_ann)) + pairwise_distances[(id(gt_ann), id(ds_ann))] = iou + if gt_ann in gt_unmatched: + gt_unmatched.remove(gt_ann) + if ds_ann in ds_unmatched: + ds_unmatched.remove(ds_ann) + matched = True + else: + # Update best mismatch if this is the highest IoU seen so far + if iou > best_mismatch_iou: + best_mismatch_iou = iou + best_mismatch_pair = (gt_ann, ds_ann) + + # If no match was found and there is a best mismatch pair + if not matched and best_mismatch_pair is not None and best_mismatch_iou >= self.iou_threshold: + mismatches.append(best_mismatch_pair) + pairwise_distances[(id(best_mismatch_pair[0]), id(best_mismatch_pair[1]))] = best_mismatch_iou + + return [matches, mismatches, gt_unmatched, ds_unmatched, pairwise_distances] + + def match_attrs(self, ann_a, ann_b): #ann_a -> gt, ann_b -> ds + a_attrs = ann_a['attributes'] + b_attrs = ann_b['attributes'] + + matches = [] + a_unmatched = a_attrs.copy() + b_unmatched = b_attrs.copy() + + for a_attr in a_attrs: + for b_attr in b_attrs: + if a_attr['spec_id'] == b_attr['spec_id'] and a_attr['value'] == b_attr['value']: + matches.append((a_attr, b_attr)) + if a_attr in a_unmatched: + a_unmatched.remove(a_attr) + if b_attr in b_unmatched: + b_unmatched.remove(b_attr) + break # Once matched, move to the next a_attr + + return matches, a_unmatched, b_unmatched + + def match_extra_parameters(self, gt_ann, ds_ann): + parameters = ['Gender', 'Locale', 'Accent', 'Emotion', 'Age'] + matches = [] + mismatches = [] + for param in parameters: + if gt_ann.get(param) == ds_ann.get(param): + matches.append(param) + else: + mismatches.append(param) + + return matches, mismatches + + + def calculate_wer(self, gt_transcript, ds_transcript): + """ + Calculate the Word Error Rate (WER) between a ground truth transcript and an annotated transcript. + """ + + gt_transcript = gt_transcript.lower() + ds_transcript = ds_transcript.lower() + + gt_words = gt_transcript.split() + ds_words = ds_transcript.split() + + if len(gt_words) == 0: + if len(ds_words) == 0: + return 0.0 # Both transcripts are empty + else: + return 1.0 # Ground truth transcript is empty but annotation transcript is not + + d = np.zeros((len(gt_words) + 1, len(ds_words) + 1), dtype=int) + + for i in range(len(gt_words) + 1): + d[i][0] = i + for j in range(len(ds_words) + 1): + d[0][j] = j + + for i in range(1, len(gt_words) + 1): + for j in range(1, len(ds_words) + 1): + if gt_words[i - 1] == ds_words[j - 1]: + d[i][j] = d[i - 1][j - 1] + else: + d[i][j] = min(d[i - 1][j] + 1, # deletion + d[i][j - 1] + 1, # insertion + d[i - 1][j - 1] + 1) # substitution + + wer = d[len(gt_words)][len(ds_words)] / float(len(gt_words)) + return wer + + def calculate_cer(self, gt_transcript, ds_transcript): + """ + Calculate the Character Error Rate (CER) between a ground truth transcript and an annotated transcript. + """ + + gt_transcript = gt_transcript.lower() + ds_transcript = ds_transcript.lower() + + gt_chars = list(gt_transcript) + ds_chars = list(ds_transcript) + + if len(gt_chars) == 0: + if len(ds_chars) == 0: + return 0.0 # Both transcripts are empty + else: + return 1.0 # Ground truth transcript is empty but annotation transcript is not + + d = np.zeros((len(gt_chars) + 1, len(ds_chars) + 1), dtype=int) + + for i in range(len(gt_chars) + 1): + d[i][0] = i + for j in range(len(ds_chars) + 1): + d[0][j] = j + + for i in range(1, len(gt_chars) + 1): + for j in range(1, len(ds_chars) + 1): + if gt_chars[i - 1] == ds_chars[j - 1]: + d[i][j] = d[i - 1][j - 1] + else: + d[i][j] = min(d[i - 1][j] + 1, # deletion + d[i][j - 1] + 1, # insertion + d[i - 1][j - 1] + 1) # substitution + + cer = d[len(gt_chars)][len(ds_chars)] / float(len(gt_chars)) + return cer + + + def _generate_job_annotation_conflicts( + self, job_results, gt_annotations, ds_annotations + ) -> List[AnnotationConflict]: + conflicts = [] + job_id = self._job_id + + matches, mismatches, gt_unmatched, ds_unmatched, pairwise_distances = job_results + + for unmatched_ann in gt_unmatched: + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.MISSING_ANNOTATION, + annotation_ids=[self._dm_ann_to_ann_id(unmatched_ann)], + ) + ) + + for unmatched_ann in ds_unmatched: + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.EXTRA_ANNOTATION, + annotation_ids=[self._dm_ann_to_ann_id(unmatched_ann)], + ) + ) + + for gt_ann, ds_ann in mismatches: + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.MISMATCHING_LABEL, + annotation_ids=[ + self._dm_ann_to_ann_id(gt_ann), + self._dm_ann_to_ann_id(ds_ann) + ], + ) + ) + + for gt_ann, ds_ann in matches: + gt_transcript = gt_ann['transcript'] + ds_transcript = ds_ann['transcript'] + wer = self.calculate_wer(gt_transcript, ds_transcript) + cer = self.calculate_cer(gt_transcript, ds_transcript) + if wer > self.wer_threshold or cer > self.cer_threshold: + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.MISMATCHING_TRANSCRIPT, + annotation_ids=[ + self._dm_ann_to_ann_id(gt_ann), + self._dm_ann_to_ann_id(ds_ann), + ], + word_error_rate=wer, + character_error_rate=cer, + ) + ) + + if self.settings.compare_attributes: + for gt_ann, ds_ann in matches: + attribute_results = self.match_attrs(gt_ann, ds_ann) + if any(attribute_results[1:]): + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.MISMATCHING_ATTRIBUTES, + annotation_ids=[ + self._dm_ann_to_ann_id(gt_ann), + self._dm_ann_to_ann_id(ds_ann), + ], + ) + ) + + if self.settings.compare_extra_parameters: + for gt_ann, ds_ann in matches: + extra_parameter_results = self.match_extra_parameters(gt_ann, ds_ann) + if any(extra_parameter_results[1:]): + conflicts.append( + AnnotationConflict( + frame_id=job_id, + type=AnnotationConflictType.MISMATCHING_EXTRA_PARAMETERS, + annotation_ids=[ + self._dm_ann_to_ann_id(gt_ann), + self._dm_ann_to_ann_id(ds_ann), + ], + ) + ) + + valid_shapes_count = len(matches) + len(mismatches) + missing_shapes_count = len(gt_unmatched) + extra_shapes_count = len(ds_unmatched) + total_shapes_count = len(matches) + len(mismatches) + len(gt_unmatched) + len(ds_unmatched) + ds_shapes_count = len(matches) + len(mismatches) + len(ds_unmatched) + gt_shapes_count = len(matches) + len(mismatches) + len(gt_unmatched) + + valid_labels_count = len(matches) + invalid_labels_count = len(mismatches) + total_labels_count = valid_labels_count + invalid_labels_count + + # Get labels from project returns a queryset) + labels_queryset = self._ds_data_provider.job_data._db_task.project.get_labels() + + # Convert queryset to a dictionary of labels + confusion_matrix_labels = { + label.id: label.name + for i, label in enumerate(labels_queryset) + if not label.parent + } + confusion_matrix_labels[None] = "unmatched" + confusion_matrix_labels_rmap = {k: i for i, k in enumerate(confusion_matrix_labels.keys())} + confusion_matrix_label_count = len(confusion_matrix_labels) + confusion_matrix = np.zeros( + (confusion_matrix_label_count, confusion_matrix_label_count), dtype=int + ) + for gt_ann, ds_ann in itertools.chain( + # fully matched annotations - shape, label, attributes + matches, + mismatches, + zip(itertools.repeat(None), ds_unmatched), + zip(gt_unmatched, itertools.repeat(None)), + ): + ds_label_idx = confusion_matrix_labels_rmap[ds_ann["label_id"] if ds_ann else None] + gt_label_idx = confusion_matrix_labels_rmap[gt_ann["label_id"] if gt_ann else None] + confusion_matrix[ds_label_idx, gt_label_idx] += 1 + + matched_ann_counts = np.diag(confusion_matrix) + ds_ann_counts = np.sum(confusion_matrix, axis=1) + gt_ann_counts = np.sum(confusion_matrix, axis=0) + label_accuracies = _arr_div( + matched_ann_counts, ds_ann_counts + gt_ann_counts - matched_ann_counts + ) + label_precisions = _arr_div(matched_ann_counts, ds_ann_counts) + label_recalls = _arr_div(matched_ann_counts, gt_ann_counts) + + valid_annotations_count = np.sum(matched_ann_counts) + missing_annotations_count = np.sum(confusion_matrix[confusion_matrix_labels_rmap[None], :]) + extra_annotations_count = np.sum(confusion_matrix[:, confusion_matrix_labels_rmap[None]]) + total_annotations_count = np.sum(confusion_matrix) + ds_annotations_count = ( + np.sum(ds_ann_counts) - ds_ann_counts[confusion_matrix_labels_rmap[None]] + ) + gt_annotations_count = ( + np.sum(gt_ann_counts) - gt_ann_counts[confusion_matrix_labels_rmap[None]] + ) + + self._frame_results[job_id] = ComparisonReportFrameSummary( + annotations=ComparisonReportAnnotationsSummary( + valid_count=valid_annotations_count, + missing_count=missing_annotations_count, + extra_count=extra_annotations_count, + total_count=total_annotations_count, + ds_count=ds_annotations_count, + gt_count=gt_annotations_count, + confusion_matrix=ConfusionMatrix( + labels=list(confusion_matrix_labels.values()), + rows=confusion_matrix, + precision=label_precisions, + recall=label_recalls, + accuracy=label_accuracies, + ), + ), + annotation_components=ComparisonReportAnnotationComponentsSummary( + shape=ComparisonReportAnnotationShapeSummary( + valid_count=valid_shapes_count, + missing_count=missing_shapes_count, + extra_count=extra_shapes_count, + total_count=total_shapes_count, + ds_count=ds_shapes_count, + gt_count=gt_shapes_count, + mean_iou=0.7, + ), + label=ComparisonReportAnnotationLabelSummary( + valid_count=valid_labels_count, + invalid_count=invalid_labels_count, + total_count=total_labels_count, + ), + ), + conflicts=conflicts, + ) + + return conflicts + + + def generate_audio_report(self) -> ComparisonReport: + self._find_audio_gt_conflicts() + + # accumulate stats + intersection_frames = [] + conflicts = [] + annotations = ComparisonReportAnnotationsSummary( + valid_count=0, + missing_count=0, + extra_count=0, + total_count=0, + ds_count=0, + gt_count=0, + confusion_matrix=None, + ) + annotation_components = ComparisonReportAnnotationComponentsSummary( + shape=ComparisonReportAnnotationShapeSummary( + valid_count=0, + missing_count=0, + extra_count=0, + total_count=0, + ds_count=0, + gt_count=0, + mean_iou=0, + ), + label=ComparisonReportAnnotationLabelSummary( + valid_count=0, + invalid_count=0, + total_count=0, + ), + ) + mean_ious = [] + confusion_matrices = [] + + for job_id, job_result in self._frame_results.items(): + intersection_frames.append(job_id) + conflicts += job_result.conflicts + + if annotations is None: + annotations = deepcopy(job_result.annotations) + else: + annotations.accumulate(job_result.annotations) + confusion_matrices.append(job_result.annotations.confusion_matrix.rows) + + if annotation_components is None: + annotation_components = deepcopy(job_result.annotation_components) + else: + annotation_components.accumulate(job_result.annotation_components) + mean_ious.append(job_result.annotation_components.shape.mean_iou) + + # Get labels from project returns a queryset) + labels_queryset = self._ds_data_provider.job_data._db_task.project.get_labels() + + # Convert queryset to a dictionary of labels + confusion_matrix_labels = { + label.id: label.name + for i, label in enumerate(labels_queryset) + if not label.parent + } + confusion_matrix_labels[None] = "unmatched" + confusion_matrix_labels_rmap = {k: i for i, k in enumerate(confusion_matrix_labels.keys())} + if confusion_matrices: + confusion_matrix = np.sum(confusion_matrices, axis=0) + else: + confusion_matrix = np.zeros( + (len(confusion_matrix_labels), len(confusion_matrix_labels)), dtype=int + ) + matched_ann_counts = np.diag(confusion_matrix) + ds_ann_counts = np.sum(confusion_matrix, axis=1) + gt_ann_counts = np.sum(confusion_matrix, axis=0) + label_accuracies = _arr_div( + matched_ann_counts, ds_ann_counts + gt_ann_counts - matched_ann_counts + ) + label_precisions = _arr_div(matched_ann_counts, ds_ann_counts) + label_recalls = _arr_div(matched_ann_counts, gt_ann_counts) + + valid_annotations_count = np.sum(matched_ann_counts) + missing_annotations_count = np.sum(confusion_matrix[confusion_matrix_labels_rmap[None], :]) + extra_annotations_count = np.sum(confusion_matrix[:, confusion_matrix_labels_rmap[None]]) + total_annotations_count = np.sum(confusion_matrix) + ds_annotations_count = ( + np.sum(ds_ann_counts) - ds_ann_counts[confusion_matrix_labels_rmap[None]] + ) + gt_annotations_count = ( + np.sum(gt_ann_counts) - gt_ann_counts[confusion_matrix_labels_rmap[None]] + ) + + return ComparisonReport( + parameters=self.settings, + comparison_summary=ComparisonReportComparisonSummary( + frame_share=( + len(intersection_frames) / (len(self._ds_data_provider.job_data.rel_range) or 1) + ), + frames=intersection_frames, + conflict_count=len(conflicts), + warning_count=len( + [c for c in conflicts if c.severity == AnnotationConflictSeverity.WARNING] + ), + error_count=len( + [c for c in conflicts if c.severity == AnnotationConflictSeverity.ERROR] + ), + conflicts_by_type=Counter(c.type for c in conflicts), + annotations=ComparisonReportAnnotationsSummary( + valid_count=valid_annotations_count, + missing_count=missing_annotations_count, + extra_count=extra_annotations_count, + total_count=total_annotations_count, + ds_count=ds_annotations_count, + gt_count=gt_annotations_count, + confusion_matrix=ConfusionMatrix( + labels=list(confusion_matrix_labels.values()), + rows=confusion_matrix, + precision=label_precisions, + recall=label_recalls, + accuracy=label_accuracies, + ), + ), + annotation_components=ComparisonReportAnnotationComponentsSummary( + shape=ComparisonReportAnnotationShapeSummary( + valid_count=annotation_components.shape.valid_count, + missing_count=annotation_components.shape.missing_count, + extra_count=annotation_components.shape.extra_count, + total_count=annotation_components.shape.total_count, + ds_count=annotation_components.shape.ds_count, + gt_count=annotation_components.shape.gt_count, + mean_iou=np.mean(mean_ious), + ), + label=ComparisonReportAnnotationLabelSummary( + valid_count=annotation_components.label.valid_count, + invalid_count=annotation_components.label.invalid_count, + total_count=annotation_components.label.total_count, + ), + ), + ), + frame_results=self._frame_results, + ) + class QualityReportUpdateManager: _QUEUE_JOB_PREFIX = "update-quality-metrics-task-" _RQ_CUSTOM_QUALITY_CHECK_JOB_TYPE = "custom_quality_check" @@ -2277,6 +2858,7 @@ def _compute_reports(self, task_id: int) -> int: gt_job_frames = gt_job_data_provider.job_data.get_included_frames() jobs: List[Job] = [j for j in job_queryset if j.type == JobType.ANNOTATION] + jobs = sorted(jobs, key=lambda job: job.id) job_data_providers = { job.id: JobDataProvider( job.id, queryset=job_queryset, included_frames=gt_job_frames @@ -2286,14 +2868,31 @@ def _compute_reports(self, task_id: int) -> int: quality_params = self._get_task_quality_params(task) + job_duration = ((task.data.chunk_size) * (task.audio_total_duration) / (task.data.stop_frame+1)) / 1000 # in seconds + job_comparison_reports: Dict[int, ComparisonReport] = {} + ind = 0 # index count for offset in intersecting jobs for job in jobs: - job_data_provider = job_data_providers[job.id] - comparator = DatasetComparator( - job_data_provider, gt_job_data_provider, settings=quality_params + job_id = job.id + job_data_provider = job_data_providers[job_id] + # comparator = DatasetComparator( + # job_data_provider, gt_job_data_provider, settings=quality_params + # ) + # job_comparison_reports[job.id] = comparator.generate_report() + offset = ind * job_duration # required only when jobs are intersecting + + start = job_data_provider.job_data.start + end = job_data_provider.job_data.stop - 1 + gt_frame_list = list(gt_job_frames) + if not (start in gt_frame_list or end in gt_frame_list): + offset = 0 + ind -= 1 + + comparator = AudioDatasetComparator( + job_data_provider, gt_job_data_provider,offset, job_duration, settings=quality_params ) - job_comparison_reports[job.id] = comparator.generate_report() - + job_comparison_reports[job_id] = comparator.generate_audio_report() + ind += 1 # Release resources del job_data_provider.dm_dataset @@ -2458,6 +3057,8 @@ def _save_reports(self, *, task_report: Dict, job_reports: List[Dict]) -> models type=conflict["type"], frame=conflict["frame_id"], severity=conflict["severity"], + word_error_rate=conflict["word_error_rate"], + character_error_rate=conflict["character_error_rate"], ) db_conflicts.append(db_conflict) diff --git a/cvat/apps/quality_control/serializers.py b/cvat/apps/quality_control/serializers.py index 711799dcef61..dbac6ee6ced3 100644 --- a/cvat/apps/quality_control/serializers.py +++ b/cvat/apps/quality_control/serializers.py @@ -21,7 +21,7 @@ class AnnotationConflictSerializer(serializers.ModelSerializer): class Meta: model = models.AnnotationConflict - fields = ("id", "frame", "type", "annotation_ids", "report_id", "severity") + fields = ("id", "frame", "type", "annotation_ids", "report_id", "severity","word_error_rate","character_error_rate") read_only_fields = fields diff --git a/cvat/requirements/base.in b/cvat/requirements/base.in index c4a1380d961c..6a0e272abd11 100644 --- a/cvat/requirements/base.in +++ b/cvat/requirements/base.in @@ -53,4 +53,5 @@ rq==1.15.1 rules>=3.3 Shapely==1.7.1 tensorflow==2.11.1 # Optional requirement of Datumaro. Use tensorflow-macos==2.8.0 for Mac M1 -soundfile==0.12.1 \ No newline at end of file +soundfile==0.12.1 +chardet==5.2.0 \ No newline at end of file diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index bcccfb1ee658..ca16427706b4 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -409,3 +409,4 @@ setuptools==68.2.2 # tensorflow soundfile==0.12.1 +chardet==5.2.0 \ No newline at end of file diff --git a/cvat/requirements/development.in b/cvat/requirements/development.in index 4d824be221cc..de43d0a947cd 100644 --- a/cvat/requirements/development.in +++ b/cvat/requirements/development.in @@ -8,4 +8,5 @@ pylint-plugin-utils==0.7 pylint==2.14.5 rope==0.17.0 snakeviz==2.1.0 -soundfile==0.12.1 \ No newline at end of file +soundfile==0.12.1 +chardet==5.2.0 \ No newline at end of file diff --git a/cvat/requirements/development.txt b/cvat/requirements/development.txt index 2d36b030a02b..a1cd030bdad6 100644 --- a/cvat/requirements/development.txt +++ b/cvat/requirements/development.txt @@ -62,5 +62,5 @@ tornado==6.3.3 # via snakeviz soundfile==0.12.1 - +chardet==5.2.0 # The following packages are considered to be unsafe in a requirements file: diff --git a/cvat/requirements/production.txt b/cvat/requirements/production.txt index 16360b4e3553..16db54eaefda 100644 --- a/cvat/requirements/production.txt +++ b/cvat/requirements/production.txt @@ -29,4 +29,5 @@ watchfiles==0.20.0 websockets==11.0.3 # via uvicorn soundfile==0.12.1 +chardet==5.2.0 # The following packages are considered to be unsafe in a requirements file: diff --git a/cvat/settings/base.py b/cvat/settings/base.py index e3d2e6cebfd1..910a6b9838fc 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -204,7 +204,7 @@ def generate_secret_key(): 'cvat.apps.iam.views.ContextMiddleware', ] -UI_URL = '' +UI_URL = 'https://app.audino.in' STATICFILES_FINDERS = [ 'django.contrib.staticfiles.finders.FileSystemFinder', @@ -270,9 +270,9 @@ def GET_IAM_DEFAULT_ROLES(user) -> list: # set UI url to redirect after a successful e-mail confirmation #changed from '/auth/login' to '/auth/email-confirmation' for email confirmation message -ACCOUNT_EMAIL_CONFIRMATION_ANONYMOUS_REDIRECT_URL = '/auth/email-confirmation' -ACCOUNT_EMAIL_VERIFICATION_SENT_REDIRECT_URL = '/auth/email-verification-sent' -INCORRECT_EMAIL_CONFIRMATION_URL = '/auth/incorrect-email-confirmation' +ACCOUNT_EMAIL_CONFIRMATION_ANONYMOUS_REDIRECT_URL = f'{UI_URL}/auth/email-confirmation' +ACCOUNT_EMAIL_VERIFICATION_SENT_REDIRECT_URL = f'{UI_URL}/auth/email-verification-sent' +INCORRECT_EMAIL_CONFIRMATION_URL = f'{UI_URL}/auth/incorrect-email-confirmation' OLD_PASSWORD_FIELD_ENABLED = True @@ -568,6 +568,7 @@ class CVAT_QUEUES(Enum): 'upload-finish', 'upload-multiple', 'x-organization', + 'upload-metadata', ] TUS_MAX_FILE_SIZE = 26843545600 # 25gb diff --git a/cvat/settings/email_settings.py b/cvat/settings/email_settings.py index d3f9621e09d4..c40866a12d48 100644 --- a/cvat/settings/email_settings.py +++ b/cvat/settings/email_settings.py @@ -10,7 +10,7 @@ ACCOUNT_AUTHENTICATION_METHOD = 'username_email' ACCOUNT_CONFIRM_EMAIL_ON_GET = True ACCOUNT_EMAIL_REQUIRED = True -ACCOUNT_EMAIL_VERIFICATION = 'mandatory' +ACCOUNT_EMAIL_VERIFICATION = 'none' # Email backend settings for Django EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'