Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extension of cloud storage server part #3386

Merged
merged 27 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
8fa9caf
Add preview && some fixes
Marishka17 Jul 3, 2021
46587a9
Fix case with sub dirs on cloud storage
Marishka17 Jul 5, 2021
93eea37
Move server part from ui_support_cloud_storage && fix missing id field
Marishka17 Jul 6, 2021
0aebb6e
Add support_key_secret_key_pair
Marishka17 Jul 9, 2021
41aa91d
Fix several moments
Marishka17 Jul 18, 2021
7e56fa9
Add index resetting
Marishka17 Jul 18, 2021
f94ef7c
Fix pylint errors
Marishka17 Jul 18, 2021
1f2915b
Remove excess migration
Marishka17 Aug 2, 2021
2f5a6ef
Merge branch 'develop' into mk/expansion_server_cloud_storage
Marishka17 Aug 2, 2021
9a8faf4
tmp
Marishka17 Aug 10, 2021
8fb8207
Some fixes
Marishka17 Aug 12, 2021
070dbcf
Fixes
Marishka17 Aug 13, 2021
b3252b1
fix
Marishka17 Aug 13, 2021
deab61b
[server] Add cloud storage status && fixes
Marishka17 Aug 26, 2021
eac737a
Merge develop && resolve conflict
Marishka17 Aug 26, 2021
ea6a0d9
Remove unused import
Marishka17 Aug 26, 2021
3d01a28
Add manifest set_index method
Marishka17 Aug 26, 2021
32761b6
Implement status support for Azure blob container
Marishka17 Aug 26, 2021
f574fee
Move specific attributes parsing into utils
Marishka17 Aug 26, 2021
3ce5bcd
Fix missing in migration
Marishka17 Aug 26, 2021
d354b18
Fix error display
Marishka17 Aug 26, 2021
c1f68a7
some fix
Marishka17 Aug 27, 2021
174e0b4
Merge branch 'develop' into mk/expansion_server_cloud_storage
Marishka17 Aug 27, 2021
b0f42af
Update migration dependency
Marishka17 Aug 30, 2021
5f94b32
Update google cloud storage status
Marishka17 Aug 30, 2021
1cff091
Update migrtaions
Marishka17 Aug 30, 2021
39881bb
Update CHANGELOG
Marishka17 Aug 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 28 additions & 20 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
step=db_data.get_frame_step())
if db_data.storage == StorageChoice.CLOUD_STORAGE:
db_cloud_storage = db_data.cloud_storage
assert db_cloud_storage, 'Cloud storage instance was deleted'
credentials = Credentials()
credentials.convert_from_db({
'type': db_cloud_storage.credentials_type,
Expand All @@ -81,26 +82,33 @@ def prepare_chunk_buff(self, db_data, quality, chunk_number):
'credentials': credentials,
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
cloud_storage_instance.initialize_content()
for item in reader:
# full_name may be 'sub_dir/image.jpeg'
full_name = f"{item['name']}{item['extension']}"
if full_name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name))
head, file_name = os.path.split(full_name)
abs_head = os.path.join(gettempdir(), head)
os.makedirs(abs_head, exist_ok=True)
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(full_name)
temp_file.write(buf.getvalue())
checksum = item.get('checksum', None)
if not checksum:
slogger.glob.warning('A manifest file does not contain checksum for image {}'.format(item.get('name')))
if checksum and not md5_hash(source_path) == checksum:
slogger.glob.warning('Hash sums of files {} do not match'.format(full_name))
images.append((source_path, source_path, None))
try:
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
cloud_storage_instance.initialize_content()
for item in reader:
# full_name may be 'sub_dir/image.jpeg'
full_name = f"{item['name']}{item['extension']}"
if full_name not in cloud_storage_instance:
raise Exception('{} file was not found on a {} storage'.format(full_name, cloud_storage_instance.name))
head, file_name = os.path.split(full_name)
abs_head = os.path.join(gettempdir(), head)
os.makedirs(abs_head, exist_ok=True)
with NamedTemporaryFile(mode='w+b', prefix='cvat', suffix=file_name, delete=False, dir=abs_head) as temp_file:
source_path = temp_file.name
buf = cloud_storage_instance.download_fileobj(full_name)
temp_file.write(buf.getvalue())
checksum = item.get('checksum', None)
if not checksum:
slogger.cloud_storage[db_cloud_storage.id].warning('A manifest file does not contain checksum for image {}'.format(item.get('name')))
if checksum and not md5_hash(source_path) == checksum:
slogger.cloud_storage[db_cloud_storage.id].warning('Hash sums of files {} do not match'.format(full_name))
images.append((source_path, source_path, None))
except Exception as ex:
Marishka17 marked this conversation as resolved.
Show resolved Hide resolved
if not cloud_storage_instance.exists():
msg = 'The resource {} is no longer available. It may have been deleted'.format(cloud_storage_instance.name)
else:
msg = str(ex)
raise Exception(msg)
else:
for item in reader:
source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
Expand Down
9 changes: 8 additions & 1 deletion cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ def _create_thread(tid, data, isImport=False):
'specific_attributes': db_cloud_storage.get_specific_attributes()
}
cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details)
cloud_storage_instance.download_file(manifest_file[0], db_data.get_manifest_path())
first_sorted_media_image = sorted(media['image'])[0]
cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image))

Expand Down Expand Up @@ -368,6 +367,14 @@ def update_progress(progress):
w, h = extractor.get_image_size(0)
else:
manifest = ImageManifestManager(db_data.get_manifest_path())
# prepare task manifest file from cloud storage manifest file
cloud_storage_manifest = ImageManifestManager(
os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0])
)
cloud_storage_manifest.init_index()
media_files = sorted(media['image'])
content = cloud_storage_manifest.get_subset(media_files)
manifest.create(content)
manifest.init_index()
img_properties = manifest[0]
w, h = img_properties['width'], img_properties['height']
Expand Down
14 changes: 12 additions & 2 deletions cvat/apps/engine/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,12 @@ def content(self, request, pk):
slogger.cloud_storage[pk].info(msg)
return Response(data=msg, status=status.HTTP_404_NOT_FOUND)
except Exception as ex:
return HttpResponseBadRequest(str(ex))
# check that cloud storage was not deleted
if not storage.exists():
msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name)
else:
msg = str(ex)
return HttpResponseBadRequest(msg)

@swagger_auto_schema(
method='get',
Expand Down Expand Up @@ -1353,7 +1358,12 @@ def preview(self, request, pk):
slogger.glob.error(message)
return HttpResponseNotFound(message)
except Exception as ex:
return HttpResponseBadRequest(str(ex))
# check that cloud storage was not deleted
if not storage.exists():
msg = 'The resource {} is no longer available. It may have been deleted'.format(storage.name)
else:
msg = str(ex)
return HttpResponseBadRequest(msg)

def rq_handler(job, exc_type, exc_value, tb):
job.exc_info = "".join(
Expand Down
21 changes: 19 additions & 2 deletions utils/dataset_manifest/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,10 @@ def index(self):
def data(self):
pass

@abstractmethod
def get_subset(self, subset_names):
pass

class VideoManifestManager(_ManifestManager):
def __init__(self, manifest_path):
super().__init__(manifest_path)
Expand Down Expand Up @@ -402,7 +406,10 @@ def video_length(self):

@property
def data(self):
return [self.video_name]
return (self.video_name)

def get_subset(self, subset_names):
raise NotImplementedError()

#TODO: add generic manifest structure file validation
class ManifestValidator:
Expand Down Expand Up @@ -484,4 +491,14 @@ def prepare_meta(sources, **kwargs):

@property
def data(self):
return [f"{image['name']}{image['extension']}" for _, image in self]
return (f"{image['name']}{image['extension']}" for _, image in self)

def get_subset(self, subset_names):
return ({
'name': f"{image['name']}",
'extension': f"{image['extension']}",
'width': image['width'],
'height': image['height'],
'meta': image['meta'],
'checksum': f"{image['checksum']}"
} for _, image in self if f"{image['name']}{image['extension']}" in subset_names)