From c75410b880e1c2e1c962507ea913da5f2dd087fd Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 26 Feb 2023 18:23:48 +0200 Subject: [PATCH] spec: get_file: don't pollute with files and fds (#1191) * spec: get_file: don't pollute with files and fds Currently if anything happens in fs.open we will leave an empty file in user's workspace and also won't close the file descriptor. Both things are pretty bad. * http: _get_file: don't leak file descriptor --- fsspec/implementations/http.py | 21 ++++++++++++++------- fsspec/spec.py | 31 ++++++++++++++++--------------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index f795dab7d..afd0c2664 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -246,13 +246,20 @@ async def _get_file( callback.set_size(size) self._raise_not_found_for_status(r, rpath) - if not isfilelike(lpath): - lpath = open(lpath, "wb") - chunk = True - while chunk: - chunk = await r.content.read(chunk_size) - lpath.write(chunk) - callback.relative_update(len(chunk)) + if isfilelike(lpath): + outfile = lpath + else: + outfile = open(lpath, "wb") + + try: + chunk = True + while chunk: + chunk = await r.content.read(chunk_size) + outfile.write(chunk) + callback.relative_update(len(chunk)) + finally: + if not isfilelike(lpath): + outfile.close() async def _put_file( self, diff --git a/fsspec/spec.py b/fsspec/spec.py index a58331f8c..b529aee48 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -848,25 +848,26 @@ def get_file( """Copy single remote file to local""" if isfilelike(lpath): outfile = lpath - else: - if self.isdir(rpath): - os.makedirs(lpath, exist_ok=True) - return None + elif self.isdir(rpath): + os.makedirs(lpath, exist_ok=True) + return None + with self.open(rpath, "rb", **kwargs) as f1: if outfile is None: outfile = open(lpath, "wb") - with self.open(rpath, "rb", **kwargs) as f1: - callback.set_size(getattr(f1, "size", None)) - data = True - while data: - data = f1.read(self.blocksize) - segment_len = outfile.write(data) - if segment_len is None: - segment_len = len(data) - callback.relative_update(segment_len) - if not isfilelike(lpath): - outfile.close() + try: + callback.set_size(getattr(f1, "size", None)) + data = True + while data: + data = f1.read(self.blocksize) + segment_len = outfile.write(data) + if segment_len is None: + segment_len = len(data) + callback.relative_update(segment_len) + finally: + if not isfilelike(lpath): + outfile.close() def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs): """Copy file(s) to local.