diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 8190f844d..1b48cd9cf 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -30,6 +30,7 @@ from google.cloud.storage import Client from google.cloud.storage import Blob from google.cloud.storage.blob import _get_host_name +from google.cloud.storage.blob import _quote from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry from google.cloud.storage.retry import DEFAULT_RETRY @@ -1083,7 +1084,7 @@ def upload_chunks_concurrently( hostname = _get_host_name(client._connection) url = "{hostname}/{bucket}/{blob}".format( - hostname=hostname, bucket=bucket.name, blob=blob.name + hostname=hostname, bucket=bucket.name, blob=_quote(blob.name) ) base_headers, object_metadata, content_type = blob._get_upload_arguments( diff --git a/tests/system/test_transfer_manager.py b/tests/system/test_transfer_manager.py index 0deab356b..7a257e960 100644 --- a/tests/system/test_transfer_manager.py +++ b/tests/system/test_transfer_manager.py @@ -245,6 +245,8 @@ def test_upload_chunks_concurrently(shared_bucket, file_data, blobs_to_delete): chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + blobs_to_delete.append(upload_blob) + transfer_manager.upload_chunks_concurrently( filename, upload_blob, @@ -418,3 +420,58 @@ def test_upload_chunks_concurrently_with_kms( source_contents = sf.read() temp_contents = tmp.read() assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_quoted_blob_names( + shared_bucket, file_data, blobs_to_delete +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "../example_bucket/mpu_file" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + # If the blob name is not quoted/encoded at all, this will result in a 403. + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + + with tempfile.NamedTemporaryFile() as tmp: + # If the blob name is not quoted correctly, this will result in a 404. + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + # Test emoji names are not mangled. + blob_name = "\U0001f681" # Helicopter emoji + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + transfer_manager.upload_chunks_concurrently( + filename, + upload_blob, + chunk_size=chunk_size, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index aa42dd9ff..cee83ba54 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -794,6 +794,57 @@ def test_upload_chunks_concurrently(): part_mock.upload.assert_called_with(transport) +def test_upload_chunks_concurrently_quotes_urls(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob(b"../wrongbucket/blob", bucket) + blob.content_type = FAKE_CONTENT_TYPE + quoted_url = "https://example.com/bucket/..%2Fwrongbucket%2Fblob" + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + container_cls_mock = mock.Mock(return_value=container_mock) + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", new=container_cls_mock + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(bucket.client._http) + + assert container_mock._retry_strategy.max_sleep == 60.0 + assert container_mock._retry_strategy.max_cumulative_retry == 120.0 + assert container_mock._retry_strategy.max_retries is None + + container_cls_mock.assert_called_once_with( + quoted_url, FILENAME, headers=mock.ANY + ) + + part_mock.upload.assert_called_with(transport) + + def test_upload_chunks_concurrently_passes_concurrency_options(): bucket = mock.Mock() bucket.name = "bucket"