From 4b305f7dae5a20364a16a69f5c37f94650b6459f Mon Sep 17 00:00:00 2001 From: Cameron Arshadi Date: Thu, 7 Mar 2024 08:36:31 -0800 Subject: [PATCH 1/2] Feat: retries additional ClientError types - We first noticed semi-frequent (1 every ~100GB), seemingly random XAmzContentSHA256Mismatch errors when uploading data to S3 on 2/27/2024. They appear to be transient errors, only needing one retry before succeeding. - There is also a less frequent (once every ~50TB or so) "Bad Request" error, which is also transient - Retrying both errors allows the uploads to succeed --- s3fs/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/s3fs/core.py b/s3fs/core.py index 7240d1c9..0fda3a40 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -120,6 +120,10 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): err = e if "SlowDown" in str(e): await asyncio.sleep(min(1.7**i * 0.1, 15)) + elif "XAmzContentSHA256Mismatch" in str(e): + await asyncio.sleep(min(1.7**i * 0.1, 15)) + elif "Bad Request" in str(e): + await asyncio.sleep(min(1.7**i * 0.1, 15)) else: break except Exception as e: From 69a0d0ace268f7de548bb9e5db313cd0929e353e Mon Sep 17 00:00:00 2001 From: Cameron Arshadi Date: Tue, 12 Mar 2024 10:25:13 -0700 Subject: [PATCH 2/2] Feat: removes "400 Bad Request" from retryable errors --- s3fs/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 0fda3a40..cb2c3d42 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -122,8 +122,6 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): await asyncio.sleep(min(1.7**i * 0.1, 15)) elif "XAmzContentSHA256Mismatch" in str(e): await asyncio.sleep(min(1.7**i * 0.1, 15)) - elif "Bad Request" in str(e): - await asyncio.sleep(min(1.7**i * 0.1, 15)) else: break except Exception as e: