From 5b44648438d0d47d49d4815280ab27bca8307249 Mon Sep 17 00:00:00 2001 From: Joey Parrish Date: Thu, 24 Oct 2024 11:18:12 -0700 Subject: [PATCH] fix(cloud): Retry on all failures Uses basic retry settings for GCS and S3. If we do not pass these params, GCS would normally only retry if certain params were passed to convince it that the operation was idempotent. We know our use case, so we know we can safely retry. There will never be two versions of the same segment file, or two concurrent writes to the MPD, for example. --- streamer/proxy_node.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/streamer/proxy_node.py b/streamer/proxy_node.py index dbaeec5..cfb7aed 100644 --- a/streamer/proxy_node.py +++ b/streamer/proxy_node.py @@ -60,6 +60,7 @@ # Optional: To support S3, import AWS's boto3 library. try: import boto3 # type: ignore + import botocore.config # type: ignore SUPPORTED_PROTOCOLS.append('s3') except: pass @@ -231,7 +232,8 @@ def handle_non_chunked(self, path: str, length: int, blob.cache_control = 'no-cache' # If you don't pass size=length, it tries to seek in the file, which fails. - blob.upload_from_file(file, size=length, retries=3) + blob.upload_from_file(file, size=length, + retry=google.cloud.storage.retry.DEFAULT_RETRY) def start_chunked(self, path: str) -> None: # No leading slashes, or we get a blank folder name. @@ -239,7 +241,8 @@ def start_chunked(self, path: str) -> None: blob = self._bucket.blob(full_path) blob.cache_control = 'no-cache' - self._chunked_output = blob.open('wb') + self._chunked_output = blob.open( + 'wb', retry=google.cloud.storage.retry.DEFAULT_RETRY) def handle_chunk(self, data: bytes) -> None: assert self._chunked_output is not None @@ -255,7 +258,7 @@ def handle_delete(self, path: str) -> None: full_path = (self._base_path + path).strip('/') blob = self._bucket.blob(full_path) try: - blob.delete() + blob.delete(retry=google.cloud.storage.retry.DEFAULT_RETRY) except google.api_core.exceptions.NotFound: # Some delete calls seem to throw "not found", but the files still get # deleted. So ignore these and don't fail the request. @@ -266,7 +269,7 @@ class S3Handler(RequestHandlerBase): # Can't annotate the client here as a parameter if we don't have the library. def __init__(self, client: Any, bucket_name: str, base_path: str, rate_limiter: RateLimiter, *args, **kwargs) -> None: - self._client: boto3.Client = client + self._client: boto3.client = client self._bucket_name: str = bucket_name self._base_path: str = base_path @@ -432,7 +435,8 @@ def __init__(self, upload_location: str) -> None: super().__init__() url = urllib.parse.urlparse(upload_location) - self._client = boto3.client('s3') + config = botocore.config.Config(retries = {'mode': 'standard'}) + self._client = boto3.client('s3', config=config) self._bucket_name = url.netloc # Strip both left and right slashes. Otherwise, we get a blank folder name. self._base_path = url.path.strip('/')