Skip to content

Commit

Permalink
Update io util to convert path like object to string (#8275)
Browse files Browse the repository at this point in the history
Fixes failing dask-cudf s3 test introduced from changes to `fsspec.stringify_path` in their 2021.5.0 release.

Problem description:
While reading remote files with dask-cudf the `path_or_object` param is populated with remote file objects such as `s3fs.s3File` or `gcsfs.GCSFile`. Updates to the `fsspec.stringify_path` util results in returning the string path for such objects resulting in cudf looking for those file paths locally. 

Fix:
Fix uses one of the branches from the original `stringify_path` util that checks if a `path like` object implement the `fsspath` protocol and gets the string representation from that.

Authors:
  - Ayush Dattagupta (https://github.com/ayushdg)

Approvers:
  - Keith Kraus (https://github.com/kkraus14)

URL: #8275
  • Loading branch information
ayushdg authored May 19, 2021
1 parent 8834ed6 commit 072cbee
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
3 changes: 1 addition & 2 deletions python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from uuid import uuid4

from fsspec.core import get_fs_token_paths
from fsspec.utils import stringify_path
from pyarrow import dataset as ds, parquet as pq

import cudf
Expand Down Expand Up @@ -203,7 +202,7 @@ def read_parquet(
for source in filepath_or_buffer:
if ioutils.is_directory(source, **kwargs):
fs = _ensure_filesystem(passed_filesystem=None, path=source)
source = stringify_path(source)
source = ioutils.stringify_pathlike(source)
source = fs.sep.join([source, "*.parquet"])

tmp_source, compression = ioutils.get_filepath_or_buffer(
Expand Down
27 changes: 24 additions & 3 deletions python/cudf/cudf/utils/ioutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ def _is_local_filesystem(fs):
def ensure_single_filepath_or_buffer(path_or_data, **kwargs):
"""Return False if `path_or_data` resolves to multiple filepaths or buffers
"""
path_or_data = fsspec.utils.stringify_path(path_or_data)
path_or_data = stringify_pathlike(path_or_data)
if isinstance(path_or_data, str):
storage_options = kwargs.get("storage_options")
path_or_data = os.path.expanduser(path_or_data)
Expand All @@ -1076,7 +1076,7 @@ def ensure_single_filepath_or_buffer(path_or_data, **kwargs):
def is_directory(path_or_data, **kwargs):
"""Returns True if the provided filepath is a directory
"""
path_or_data = fsspec.utils.stringify_path(path_or_data)
path_or_data = stringify_pathlike(path_or_data)
if isinstance(path_or_data, str):
storage_options = kwargs.get("storage_options")
path_or_data = os.path.expanduser(path_or_data)
Expand Down Expand Up @@ -1121,7 +1121,7 @@ def get_filepath_or_buffer(
compression : str
Type of compression algorithm for the content
"""
path_or_data = fsspec.utils.stringify_path(path_or_data)
path_or_data = stringify_pathlike(path_or_data)

if isinstance(path_or_data, str):
storage_options = kwargs.get("storage_options")
Expand Down Expand Up @@ -1223,6 +1223,27 @@ def is_fsspec_open_file(file_obj):
return False


def stringify_pathlike(pathlike):
"""
Convert any object that implements the fspath protocol
to a string. Leaves other objects unchanged
Parameters
----------
pathlike
Pathlike object that implements the fspath protocol
Returns
-------
maybe_pathlike_str
String version of the object if possible
"""
maybe_pathlike_str = (
pathlike.__fspath__() if hasattr(pathlike, "__fspath__") else pathlike
)

return maybe_pathlike_str


def buffer_write_lines(buf, lines):
"""
Appends lines to a buffer.
Expand Down

0 comments on commit 072cbee

Please sign in to comment.