From 689af42a34021e9fd987adcb9bbcffc6b02daade Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 18 May 2021 11:09:59 -0700 Subject: [PATCH 1/2] Replace use of stringigy_path with check for fsspath attribute --- python/cudf/cudf/io/parquet.py | 7 +++++-- python/cudf/cudf/utils/ioutils.py | 18 +++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index c17630d1227..4b950585756 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -5,7 +5,6 @@ from uuid import uuid4 from fsspec.core import get_fs_token_paths -from fsspec.utils import stringify_path from pyarrow import dataset as ds, parquet as pq import cudf @@ -203,7 +202,11 @@ def read_parquet( for source in filepath_or_buffer: if ioutils.is_directory(source, **kwargs): fs = _ensure_filesystem(passed_filesystem=None, path=source) - source = stringify_path(source) + source = ( + source.__fspath__() + if hasattr(source, "__fspath__") + else source + ) source = fs.sep.join([source, "*.parquet"]) tmp_source, compression = ioutils.get_filepath_or_buffer( diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 16511627aa2..134e7c8fe40 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1051,7 +1051,11 @@ def _is_local_filesystem(fs): def ensure_single_filepath_or_buffer(path_or_data, **kwargs): """Return False if `path_or_data` resolves to multiple filepaths or buffers """ - path_or_data = fsspec.utils.stringify_path(path_or_data) + path_or_data = ( + path_or_data.__fspath__() + if hasattr(path_or_data, "__fspath__") + else path_or_data + ) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") path_or_data = os.path.expanduser(path_or_data) @@ -1076,7 +1080,11 @@ def ensure_single_filepath_or_buffer(path_or_data, **kwargs): def is_directory(path_or_data, **kwargs): """Returns True if the provided filepath is a directory """ - path_or_data = fsspec.utils.stringify_path(path_or_data) + path_or_data = ( + path_or_data.__fspath__() + if hasattr(path_or_data, "__fspath__") + else path_or_data + ) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") path_or_data = os.path.expanduser(path_or_data) @@ -1121,7 +1129,11 @@ def get_filepath_or_buffer( compression : str Type of compression algorithm for the content """ - path_or_data = fsspec.utils.stringify_path(path_or_data) + path_or_data = ( + path_or_data.__fspath__() + if hasattr(path_or_data, "__fspath__") + else path_or_data + ) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") From 4a7c65205052eff516c529798f80cc55baa7e041 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 18 May 2021 14:31:12 -0700 Subject: [PATCH 2/2] Add stringify_pathlike as a helper in ioutils --- python/cudf/cudf/io/parquet.py | 6 +---- python/cudf/cudf/utils/ioutils.py | 39 +++++++++++++++++++------------ 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 4b950585756..5ace108a72d 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -202,11 +202,7 @@ def read_parquet( for source in filepath_or_buffer: if ioutils.is_directory(source, **kwargs): fs = _ensure_filesystem(passed_filesystem=None, path=source) - source = ( - source.__fspath__() - if hasattr(source, "__fspath__") - else source - ) + source = ioutils.stringify_pathlike(source) source = fs.sep.join([source, "*.parquet"]) tmp_source, compression = ioutils.get_filepath_or_buffer( diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 134e7c8fe40..15120fd8fab 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1051,11 +1051,7 @@ def _is_local_filesystem(fs): def ensure_single_filepath_or_buffer(path_or_data, **kwargs): """Return False if `path_or_data` resolves to multiple filepaths or buffers """ - path_or_data = ( - path_or_data.__fspath__() - if hasattr(path_or_data, "__fspath__") - else path_or_data - ) + path_or_data = stringify_pathlike(path_or_data) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") path_or_data = os.path.expanduser(path_or_data) @@ -1080,11 +1076,7 @@ def ensure_single_filepath_or_buffer(path_or_data, **kwargs): def is_directory(path_or_data, **kwargs): """Returns True if the provided filepath is a directory """ - path_or_data = ( - path_or_data.__fspath__() - if hasattr(path_or_data, "__fspath__") - else path_or_data - ) + path_or_data = stringify_pathlike(path_or_data) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") path_or_data = os.path.expanduser(path_or_data) @@ -1129,11 +1121,7 @@ def get_filepath_or_buffer( compression : str Type of compression algorithm for the content """ - path_or_data = ( - path_or_data.__fspath__() - if hasattr(path_or_data, "__fspath__") - else path_or_data - ) + path_or_data = stringify_pathlike(path_or_data) if isinstance(path_or_data, str): storage_options = kwargs.get("storage_options") @@ -1235,6 +1223,27 @@ def is_fsspec_open_file(file_obj): return False +def stringify_pathlike(pathlike): + """ + Convert any object that implements the fspath protocol + to a string. Leaves other objects unchanged + Parameters + ---------- + pathlike + Pathlike object that implements the fspath protocol + + Returns + ------- + maybe_pathlike_str + String version of the object if possible + """ + maybe_pathlike_str = ( + pathlike.__fspath__() if hasattr(pathlike, "__fspath__") else pathlike + ) + + return maybe_pathlike_str + + def buffer_write_lines(buf, lines): """ Appends lines to a buffer.