From c32b0e5af69f5aeec8f5f4d6fc2aa0a70e502601 Mon Sep 17 00:00:00 2001 From: Andrei Tcibin Date: Tue, 2 Jul 2019 18:15:57 +0300 Subject: [PATCH] Change download chunk size and increase download resume attempts. Use buffering size as download chunk size in order to improve overall google storage blobs download performance. Also increase default download resume attempts to bypass the connection reset issue described in #435 for most of the possible cases. --- pipe-cli/src/utilities/storage/gs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pipe-cli/src/utilities/storage/gs.py b/pipe-cli/src/utilities/storage/gs.py index e2e844d52f..e8bd2de0f9 100644 --- a/pipe-cli/src/utilities/storage/gs.py +++ b/pipe-cli/src/utilities/storage/gs.py @@ -17,6 +17,7 @@ from google.auth.transport.requests import AuthorizedSession from google.cloud.storage import Client, Blob from google.oauth2.credentials import Credentials +from google import resumable_media from google.resumable_media import DataCorruption from google.resumable_media.requests import Download @@ -169,7 +170,7 @@ def _do_resumable_upload(self, client, stream, content_type, size, num_retries, class _ProgressBlob(_ResumableDownloadProgressMixin, _UploadProgressMixin, Blob): PROGRESS_CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB - DEFAULT_RESUME_ATTEMPTS = 5 + DEFAULT_RESUME_ATTEMPTS = 100 def __init__(self, size, progress_callback, attempts=DEFAULT_RESUME_ATTEMPTS, *args, **kwargs): """ @@ -490,12 +491,16 @@ def transfer(self, source_wrapper, destination_wrapper, path=None, relative_path def _download_to_file(self, blob, destination_key): try: + self._replace_default_download_chunk_size(self._buffering) with open(destination_key, "wb", buffering=self._buffering) as file_obj: blob.download_to_file(file_obj) except DataCorruption: os.remove(destination_key) raise + def _replace_default_download_chunk_size(self, chunk_size): + resumable_media.requests.download._SINGLE_GET_CHUNK_SIZE = chunk_size + class GsUploadManager(GsManager, AbstractTransferManager):