Skip to content

Commit

Permalink
Reduce the amount of ListRequests for a recursive put for targets not…
Browse files Browse the repository at this point in the history
… ending in "/"
  • Loading branch information
MatthijsPiek committed Nov 9, 2023
1 parent 2c07450 commit db733af
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 28 deletions.
44 changes: 22 additions & 22 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ async def _lsdir(
except ClientError as e:
raise translate_boto_error(e)

if delimiter and files and not versions:
if delimiter and not versions:
self.dircache[path] = files
return files
return self.dircache[path]
Expand Down Expand Up @@ -1001,13 +1001,18 @@ async def _ls(self, path, detail=False, refresh=False, versions=False):
def _exists_in_cache(self, path, bucket, key, version_id):
fullpath = "/".join((bucket, key))

entries = None
try:
entries = self._ls_from_cache(fullpath)
except FileNotFoundError:
return False
pass

if entries is None:
return None
children = [key for key in self.dircache.keys() if key.startswith(fullpath)]
if len(children) == 0:
return None
else:
return True # if children exist, the parent also exists

if not self.version_aware or version_id is None:
return True
Expand Down Expand Up @@ -1036,27 +1041,22 @@ async def _exists(self, path):
return True
except FileNotFoundError:
return False
elif self.dircache.get(bucket, False):
return True
else:
try:
if self._ls_from_cache(bucket):
return True
except FileNotFoundError:
# might still be a bucket we can access but don't own
pass
try:
await self._call_s3(
"list_objects_v2", MaxKeys=1, Bucket=bucket, **self.req_kw
)
return True
except Exception:
pass
try:
await self._call_s3("get_bucket_location", Bucket=bucket, **self.req_kw)
if self._exists_in_cache(path, bucket, key, version_id):
return True
except Exception:
return False
else:
try:
await self._call_s3(
"list_objects_v2", MaxKeys=1, Bucket=bucket, **self.req_kw
)
return True
except Exception:
pass
try:
await self._call_s3("get_bucket_location", Bucket=bucket, **self.req_kw)
return True
except Exception:
return False

exists = sync_wrapper(_exists)

Expand Down
37 changes: 31 additions & 6 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2613,11 +2613,12 @@ def test_get_directory_recursive(s3, tmpdir):
assert target_fs.find(target) == [os.path.join(target, "file")]


def test_put_directory_recursive(s3, tmpdir):
def test_put_directory_recursive(s3, tmpdir, request_count):
src = os.path.join(tmpdir, "src")
src_file = os.path.join(src, "file")
deep_src_path = os.path.join(src, "very", "deep")
src_file = os.path.join(deep_src_path, "file")
source_fs = fsspec.filesystem("file")
source_fs.mkdir(src)
source_fs.mkdir(deep_src_path)
source_fs.touch(src_file)

target = test_bucket_name + "/target"
Expand All @@ -2629,18 +2630,42 @@ def test_put_directory_recursive(s3, tmpdir):
assert s3.isdir(target)

if loop == 0:
assert s3.find(target) == [target + "/file"]
assert s3.find(target) == [target + "/very/deep/file"]
else:
assert sorted(s3.find(target)) == [target + "/file", target + "/src/file"]
assert sorted(s3.find(target)) == [target + "/src/very/deep/file", target + "/very/deep/file"]

s3.rm(target, recursive=True)

# put with slash
assert not s3.exists(target)


for loop in range(2):
s3.dircache.clear()
request_count.clear()

s3.put(src + "/", target, recursive=True)

assert request_count.get("PutObject", 0) == 1
assert request_count.get("ListObjectsV2", 0) <= 1

assert s3.isdir(target)
assert s3.find(target) == [target + "/file"]
assert s3.find(target) == [target + "/very/deep/file"]


@pytest.fixture()
def request_count(s3):
count = {}

def count_requests(request, operation_name, **kwargs):
nonlocal count
count[operation_name] = count.get(operation_name, 0) + 1

s3.s3.meta.events.register("request-created.s3", count_requests)

yield count

s3.s3.meta.events.unregister("request-created.s3", count_requests)


def test_cp_two_files(s3):
Expand Down

0 comments on commit db733af

Please sign in to comment.