Skip to content
This repository has been archived by the owner on Aug 21, 2023. It is now read-only.

Commit

Permalink
Images should be searched relative to the manifest file on cloud stor…
Browse files Browse the repository at this point in the history
…age (#8)
  • Loading branch information
Marishka17 authored Aug 1, 2022
1 parent 4cd6b05 commit 164c2c3
Show file tree
Hide file tree
Showing 13 changed files with 96 additions and 37 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Request Status Code 500 "StopIteration" when exporting dataset
- Generated OpenAPI schema for several endpoints
- Annotation window might have top offset if try to move a locked object
- Image search in cloud storage (<https://github.com/cvat-ai/cvat/pull/8>)

### Security
- TDB
Expand Down
2 changes: 1 addition & 1 deletion cvat-sdk/.openapi-generator/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6.0.1-SNAPSHOT
6.1.0-SNAPSHOT
10 changes: 3 additions & 7 deletions cvat-sdk/cvat_sdk/api/cloud_storages_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,7 @@ def __init__(self, api_client=None):
)
self.cloudstorages_retrieve_content_endpoint = _Endpoint(
settings={
"response_schema": (
{str: (bool, date, datetime, dict, float, int, list, str, none_type)},
),
"response_schema": ([str],),
"auth": ["SignatureAuthentication", "basicAuth", "cookieAuth", "tokenAuth"],
"endpoint_path": "/api/cloudstorages/{id}/content",
"operation_id": "cloudstorages_retrieve_content",
Expand Down Expand Up @@ -1050,9 +1048,7 @@ def cloudstorages_retrieve_content(
_request_auths: typing.Optional[typing.List] = None,
_async_call: bool = False,
**kwargs,
) -> typing.Tuple[
typing.Optional[typing.Dict[str, typing.Union[typing.Any, none_type]]], urllib3.HTTPResponse
]:
) -> typing.Tuple[typing.Optional[typing.List[str]], urllib3.HTTPResponse]:
"""Method returns a manifest content # noqa: E501
This method makes a synchronous HTTP request by default. To make an
Expand Down Expand Up @@ -1102,7 +1098,7 @@ def cloudstorages_retrieve_content(
_async_call (bool): execute request asynchronously
Returns:
({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, HTTPResponse)
([str], HTTPResponse)
If the method is called asynchronously, returns the request
thread.
"""
Expand Down
4 changes: 2 additions & 2 deletions cvat-sdk/docs/CloudStoragesApi.md
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ Name | Type | Description | Notes
[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)

# **cloudstorages_retrieve_content**
> {str: (bool, date, datetime, dict, float, int, list, str, none_type)} cloudstorages_retrieve_content(id)
> [str] cloudstorages_retrieve_content(id)
Method returns a manifest content

Expand Down Expand Up @@ -826,7 +826,7 @@ Name | Type | Description | Notes

### Return type

**{str: (bool, date, datetime, dict, float, int, list, str, none_type)}**
**[str]**

### Authorization

Expand Down
14 changes: 13 additions & 1 deletion cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,20 @@ def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False):
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file),
db_data.cloud_storage.get_storage_dirname()
)
cloud_storage_manifest_prefix = os.path.dirname(manifest_file)
cloud_storage_manifest.set_index()
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
if cloud_storage_manifest_prefix:
sorted_media_without_manifest_prefix = [
os.path.relpath(i, cloud_storage_manifest_prefix) for i in sorted_media
]
sequence, raw_content = cloud_storage_manifest.get_subset(sorted_media_without_manifest_prefix)
def _add_prefix(properties):
file_name = properties['name']
properties['name'] = os.path.join(cloud_storage_manifest_prefix, file_name)
return properties
content = list(map(_add_prefix, raw_content))
else:
sequence, content = cloud_storage_manifest.get_subset(sorted_media)
sorted_content = (i[1] for i in sorted(zip(sequence, content)))
manifest.create(sorted_content)

Expand Down
10 changes: 7 additions & 3 deletions cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
OpenApiParameter, OpenApiResponse, PolymorphicProxySerializer,
extend_schema_view, extend_schema
)
from drf_spectacular.plumbing import build_array_type, build_basic_type

from rest_framework import mixins, serializers, status, viewsets
from rest_framework.decorators import action
Expand Down Expand Up @@ -1895,7 +1896,7 @@ def create(self, request, *args, **kwargs):
location=OpenApiParameter.QUERY, type=OpenApiTypes.STR),
],
responses={
'200': OpenApiResponse(response=OpenApiTypes.OBJECT, description='A manifest content'),
'200': OpenApiResponse(response=build_array_type(build_basic_type(OpenApiTypes.STR)), description='A manifest content'),
})
@action(detail=True, methods=['GET'], url_path='content')
def content(self, request, pk):
Expand All @@ -1906,6 +1907,7 @@ def content(self, request, pk):
if not db_storage.manifests.count():
raise Exception('There is no manifest file')
manifest_path = request.query_params.get('manifest_path', db_storage.manifests.first().filename)
manifest_prefix = os.path.dirname(manifest_path)
file_status = storage.get_file_status(manifest_path)
if file_status == CloudStorageStatus.NOT_FOUND:
raise FileNotFoundError(errno.ENOENT,
Expand All @@ -1921,7 +1923,7 @@ def content(self, request, pk):
manifest = ImageManifestManager(full_manifest_path, db_storage.get_storage_dirname())
# need to update index
manifest.set_index()
manifest_files = manifest.data
manifest_files = [os.path.join(manifest_prefix, f) for f in manifest.data]
return Response(data=manifest_files, content_type="text/plain")

except CloudStorageModel.DoesNotExist:
Expand Down Expand Up @@ -1958,6 +1960,7 @@ def preview(self, request, pk):
raise Exception('Cannot get the cloud storage preview. There is no manifest file')
preview_path = None
for manifest_model in db_storage.manifests.all():
manifest_prefix = os.path.dirname(manifest_model.filename)
full_manifest_path = os.path.join(db_storage.get_storage_dirname(), manifest_model.filename)
if not os.path.exists(full_manifest_path) or \
datetime.utcfromtimestamp(os.path.getmtime(full_manifest_path)).replace(tzinfo=pytz.UTC) < storage.get_file_last_modified(manifest_model.filename):
Expand All @@ -1971,7 +1974,8 @@ def preview(self, request, pk):
if not len(manifest):
continue
preview_info = manifest[0]
preview_path = ''.join([preview_info['name'], preview_info['extension']])
preview_filename = ''.join([preview_info['name'], preview_info['extension']])
preview_path = os.path.join(manifest_prefix, preview_filename)
break
if not preview_path:
msg = 'Cloud storage {} does not contain any images'.format(pk)
Expand Down
4 changes: 2 additions & 2 deletions tests/rest_api/assets/cloudstorages.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"display_name": "Bucket 2",
"id": 2,
"manifests": [
"manifest.jsonl"
"sub/manifest.jsonl"
],
"organization": 2,
"owner": {
Expand All @@ -45,7 +45,7 @@
"provider_type": "AWS_S3_BUCKET",
"resource": "private",
"specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000",
"updated_date": "2022-03-17T07:23:59.309000Z"
"updated_date": "2022-07-13T12:46:45.587000Z"
},
{
"created_date": "2022-03-17T07:22:49.519000Z",
Expand Down
Binary file modified tests/rest_api/assets/cvat_db/cvat_data.tar.bz2
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/rest_api/assets/cvat_db/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -4657,7 +4657,7 @@
"model": "engine.manifest",
"pk": 2,
"fields": {
"filename": "manifest.jsonl",
"filename": "sub/manifest.jsonl",
"cloud_storage": 2
}
},
Expand Down Expand Up @@ -4699,7 +4699,7 @@
"business2"
],
"created_date": "2022-03-17T07:23:59.305Z",
"updated_date": "2022-03-17T07:23:59.309Z",
"updated_date": "2022-07-13T12:46:45.587Z",
"credentials": "minio_access_key minio_secret_key",
"credentials_type": "KEY_SECRET_KEY_PAIR",
"specific_attributes": "endpoint_url=http%3A%2F%2Fminio%3A9000",
Expand Down
2 changes: 1 addition & 1 deletion tests/rest_api/assets/users.json
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@
"is_active": true,
"is_staff": true,
"is_superuser": true,
"last_login": "2022-06-29T12:55:15.511000Z",
"last_login": "2022-07-13T12:46:07.059000Z",
"last_name": "First",
"url": "http://localhost:8080/api/users/1",
"username": "admin1"
Expand Down
10 changes: 8 additions & 2 deletions tests/rest_api/docker-compose.minio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@ services:
$${MC_PATH} mb $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET};
for BUCKET in $${MINIO_ALIAS}/$${PRIVATE_BUCKET} $${MINIO_ALIAS}/$${PUBLIC_BUCKET} $${MINIO_ALIAS}/$${TEST_BUCKET} $${MINIO_ALIAS}/$${IMPORT_EXPORT_BUCKET};
do
$${MC_PATH} cp --recursive /storage/ $${BUCKET};
if [ $${BUCKET} == $${MINIO_ALIAS}/$${PRIVATE_BUCKET} ]
then
FULL_PATH=$${BUCKET}/'sub'
else
FULL_PATH=$${BUCKET}
fi
$${MC_PATH} cp --recursive /storage/ $${FULL_PATH};
for i in 1 2;
do
$${MC_PATH} cp /storage/manifest.jsonl $${BUCKET}/manifest_$${i}.jsonl;
$${MC_PATH} cp /storage/manifest.jsonl $${FULL_PATH}/manifest_$${i}.jsonl;
done;
done;
$${MC_PATH} policy set public $${MINIO_ALIAS}/$${PUBLIC_BUCKET};
Expand Down
17 changes: 13 additions & 4 deletions tests/rest_api/test_cloud_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def _test_can_see(self, user, storage_id, data, **kwargs):
response_data = response_data.get('results', response_data)

assert response.status_code == HTTPStatus.OK
assert DeepDiff(data, response_data, ignore_order=True) == {}
assert DeepDiff(data, response_data, ignore_order=True,
exclude_paths="root['updated_date']") == {}

def _test_cannot_see(self, user, storage_id, **kwargs):
response = get_method(user, f'cloudstorages/{storage_id}', **kwargs)
Expand Down Expand Up @@ -132,6 +133,14 @@ class TestPatchCloudStorage:
'manifest_2.jsonl',
],
}
_PRIVATE_BUCKET_SPEC = {
'display_name': 'New display name',
'description': 'New description',
'manifests': [
'sub/manifest_1.jsonl',
'sub/manifest_2.jsonl',
],
}
_EXCLUDE_PATHS = [
f"root['{extra_field}']" for extra_field in {
# unchanged fields
Expand All @@ -145,7 +154,7 @@ def _test_can_update(self, user, storage_id, spec, **kwargs):
response_data = response_data.get('results', response_data)

assert response.status_code == HTTPStatus.OK
assert DeepDiff(self._SPEC, response_data, ignore_order=True,
assert DeepDiff(spec, response_data, ignore_order=True,
exclude_paths=self._EXCLUDE_PATHS) == {}

assert response.status_code == HTTPStatus.OK
Expand Down Expand Up @@ -186,6 +195,6 @@ def test_org_user_update_cloud_storage(self, org_id, storage_id, role, is_owner,
next((u for u in find_users(role=role, org=org_id) if u['id'] != cloud_storage['owner']['id']))['username']

if is_allow:
self._test_can_update(username, storage_id, self._SPEC, org_id=org_id)
self._test_can_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id)
else:
self._test_cannot_update(username, storage_id, self._SPEC, org_id=org_id)
self._test_cannot_update(username, storage_id, self._PRIVATE_BUCKET_SPEC, org_id=org_id)
55 changes: 43 additions & 12 deletions tests/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def generate_image_files(count):

return images

def get_cloud_storage_content(username, cloud_storage_id, manifest):
with make_api_client(username) as api_client:
(_, response) = api_client.cloud_storages_api.cloudstorages_retrieve_content(cloud_storage_id, manifest_path=manifest)
data = json.loads(response.data)
return data


@pytest.mark.usefixtures('dontchangedb')
class TestGetTasks:
Expand Down Expand Up @@ -272,7 +278,10 @@ def test_admin_can_export_task_dataset(self, tasks_with_shapes):
self._test_export_project('admin1', task['id'], format='CVAT for images 1.1')

@pytest.mark.usefixtures("changedb")
@pytest.mark.usefixtures("restore_cvat_data")
class TestPostTaskData:
_USERNAME = 'admin1'

@staticmethod
def _wait_until_task_is_created(api: TasksApi, task_id: int) -> RqStatus:
for _ in range(100):
Expand All @@ -282,14 +291,14 @@ def _wait_until_task_is_created(api: TasksApi, task_id: int) -> RqStatus:
sleep(1)
raise Exception('Cannot create task')

def _test_create_task(self, username, spec, data, files):
def _test_create_task(self, username, spec, data, content_type, **kwargs):
with make_api_client(username) as api_client:
(task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec))
(task, response) = api_client.tasks_api.create(TaskWriteRequest(**spec), **kwargs)
assert response.status == HTTPStatus.CREATED

task_data = DataRequest(**data, client_files=list(files.values()))
task_data = DataRequest(**data)
(_, response) = api_client.tasks_api.create_data(task.id, task_data,
_content_type="multipart/form-data")
_content_type=content_type, **kwargs)
assert response.status == HTTPStatus.ACCEPTED

status = self._wait_until_task_is_created(api_client.tasks_api, task.id)
Expand All @@ -298,9 +307,8 @@ def _test_create_task(self, username, spec, data, files):
return task.id

def test_can_create_task_with_defined_start_and_stop_frames(self):
username = 'admin1'
task_spec = {
'name': f'test {username} to create a task with defined start and stop frames',
'name': f'test {self._USERNAME} to create a task with defined start and stop frames',
"labels": [{
"name": "car",
"color": "#ff00ff",
Expand All @@ -319,15 +327,38 @@ def test_can_create_task_with_defined_start_and_stop_frames(self):
task_data = {
'image_quality': 75,
'start_frame': 2,
'stop_frame': 5
}
task_files = {
f'client_files[{i}]': image for i, image in enumerate(generate_image_files(7))
'stop_frame': 5,
'client_files': generate_image_files(7),
}

task_id = self._test_create_task(username, task_spec, task_data, task_files)
task_id = self._test_create_task(self._USERNAME, task_spec, task_data, content_type="multipart/form-data")

# check task size
with make_api_client(username) as api_client:
with make_api_client(self._USERNAME) as api_client:
(task, _) = api_client.tasks_api.retrieve(task_id)
assert task.size == 4

@pytest.mark.parametrize('cloud_storage_id, manifest, org', [
(1, 'manifest.jsonl', ''), # public bucket
(2, 'sub/manifest.jsonl', 'org2'), # private bucket
])
def test_create_task_with_cloud_storage_files(self, cloud_storage_id, manifest, org):
cloud_storage_content = get_cloud_storage_content(self._USERNAME, cloud_storage_id, manifest)
cloud_storage_content.append(manifest)

task_spec = {
"name": f"Task with files from cloud storage {cloud_storage_id}",
"labels": [{
"name": "car",
}],
}

data_spec = {
'image_quality': 75,
'use_cache': True,
'storage': 'cloud_storage',
'cloud_storage_id': cloud_storage_id,
'server_files': cloud_storage_content,
}

_ = self._test_create_task(self._USERNAME, task_spec, data_spec, content_type="application/json", org=org)

0 comments on commit 164c2c3

Please sign in to comment.