Skip to content

Commit

Permalink
spec: get_file: don't pollute with files and fds (fsspec#1191)
Browse files Browse the repository at this point in the history
* spec: get_file: don't pollute with files and fds

Currently if anything happens in fs.open we will leave an empty file in user's
workspace and also won't close the file descriptor. Both things are pretty bad.

* http: _get_file: don't leak file descriptor
  • Loading branch information
efiop authored and agoodm committed Mar 14, 2023
1 parent 69f7dd4 commit 67756e1
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 22 deletions.
21 changes: 14 additions & 7 deletions fsspec/implementations/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,20 @@ async def _get_file(

callback.set_size(size)
self._raise_not_found_for_status(r, rpath)
if not isfilelike(lpath):
lpath = open(lpath, "wb")
chunk = True
while chunk:
chunk = await r.content.read(chunk_size)
lpath.write(chunk)
callback.relative_update(len(chunk))
if isfilelike(lpath):
outfile = lpath
else:
outfile = open(lpath, "wb")

try:
chunk = True
while chunk:
chunk = await r.content.read(chunk_size)
outfile.write(chunk)
callback.relative_update(len(chunk))
finally:
if not isfilelike(lpath):
outfile.close()

async def _put_file(
self,
Expand Down
31 changes: 16 additions & 15 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,25 +848,26 @@ def get_file(
"""Copy single remote file to local"""
if isfilelike(lpath):
outfile = lpath
else:
if self.isdir(rpath):
os.makedirs(lpath, exist_ok=True)
return None
elif self.isdir(rpath):
os.makedirs(lpath, exist_ok=True)
return None

with self.open(rpath, "rb", **kwargs) as f1:
if outfile is None:
outfile = open(lpath, "wb")

with self.open(rpath, "rb", **kwargs) as f1:
callback.set_size(getattr(f1, "size", None))
data = True
while data:
data = f1.read(self.blocksize)
segment_len = outfile.write(data)
if segment_len is None:
segment_len = len(data)
callback.relative_update(segment_len)
if not isfilelike(lpath):
outfile.close()
try:
callback.set_size(getattr(f1, "size", None))
data = True
while data:
data = f1.read(self.blocksize)
segment_len = outfile.write(data)
if segment_len is None:
segment_len = len(data)
callback.relative_update(segment_len)
finally:
if not isfilelike(lpath):
outfile.close()

def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs):
"""Copy file(s) to local.
Expand Down

0 comments on commit 67756e1

Please sign in to comment.