Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-22.06' into conda_compilers
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored May 23, 2022
2 parents 78d7793 + 5067cc7 commit a775bcb
Show file tree
Hide file tree
Showing 16 changed files with 99 additions and 32 deletions.
8 changes: 8 additions & 0 deletions conda/recipes/cudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
c_compiler_version:
- 9

cxx_compiler_version:
- 9

sysroot_version:
- "2.17"
7 changes: 4 additions & 3 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@ build:
script_env:
- VERSION_SUFFIX
- PARALLEL_LEVEL
- CC
- CXX
- CUDAHOSTCXX
# libcudf's run_exports pinning is looser than we would like
ignore_run_exports:
- libcudf

requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- protobuf
- python
- cython >=0.29,<0.30
Expand Down
8 changes: 8 additions & 0 deletions conda/recipes/cudf_kafka/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
c_compiler_version:
- 9

cxx_compiler_version:
- 9

sysroot_version:
- "2.17"
6 changes: 3 additions & 3 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: py{{ py_version_numeric }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- CC
- CXX
- CUDAHOSTCXX
- PARALLEL_LEVEL
- VERSION_SUFFIX

requirements:
build:
- cmake >=3.20.1,!=3.23.0
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- python
- cython >=0.29,<0.30
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ build:
script_env:
- VERSION_SUFFIX
- PARALLEL_LEVEL
- CC
- CXX
- CUDAHOSTCXX

requirements:
host:
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ build:
script_env:
- VERSION_SUFFIX
- PARALLEL_LEVEL
- CC
- CXX
- CUDAHOSTCXX

requirements:
host:
Expand Down
12 changes: 12 additions & 0 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
c_compiler_version:
- 9

cxx_compiler_version:
- 9

cuda_compiler:
- nvcc

sysroot_version:
- "2.17"

cmake_version:
- ">=3.20.1,!=3.23.0"

Expand Down
18 changes: 15 additions & 3 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ source:

build:
script_env:
- CC
- CXX
- CUDAHOSTCXX
- PARALLEL_LEVEL
- CMAKE_GENERATOR
- CMAKE_C_COMPILER_LAUNCHER
Expand All @@ -31,6 +28,10 @@ build:
requirements:
build:
- cmake {{ cmake_version }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} {{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
Expand All @@ -48,6 +49,8 @@ outputs:
string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
run_exports:
- {{ pin_subpackage("libcudf", max_pin="x.x") }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
build:
- cmake {{ cmake_version }}
Expand Down Expand Up @@ -287,6 +290,8 @@ outputs:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
build:
- cmake {{ cmake_version }}
Expand All @@ -308,9 +313,14 @@ outputs:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
build:
- cmake {{ cmake_version }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libcudf', exact=True) }}
run:
Expand All @@ -327,6 +337,8 @@ outputs:
build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
- {{ compiler('cuda') }}
requirements:
build:
- cmake {{ cmake_version }}
Expand Down
20 changes: 20 additions & 0 deletions cpp/include/cudf/table/experimental/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ namespace row {
enum class lhs_index_type : size_type {};
enum class rhs_index_type : size_type {};

/**
* @brief A counting iterator that uses strongly typed indices bound to tables.
*
* Performing lexicographic or equality comparisons between values in two
* tables requires the use of strongly typed indices. The strong index types
* `lhs_index_type` and `rhs_index_type` ensure that index values are bound to
* the correct table, regardless of the order in which these indices are
* provided to the call operator. This struct and its type aliases
* `lhs_iterator` and `rhs_iterator` provide an interface similar to a counting
* iterator, with strongly typed values to represent the table indices.
*
* @tparam Index The strong index type
*/
template <typename Index, typename Underlying = std::underlying_type_t<Index>>
struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
Index,
Expand Down Expand Up @@ -110,7 +123,14 @@ struct strong_index_iterator : public thrust::iterator_facade<strong_index_itera
Underlying begin{};
};

/**
* @brief Iterator representing indices into a left-side table.
*/
using lhs_iterator = strong_index_iterator<lhs_index_type>;

/**
* @brief Iterator representing indices into a right-side table.
*/
using rhs_iterator = strong_index_iterator<rhs_index_type>;

namespace lexicographic {
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def read_avro(
"`read_avro` does not yet support reading multiple files"
)

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer, compression=None, **kwargs
)
if compression is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def read_csv(
"`read_csv` does not yet support reading multiple files"
)

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer,
compression=compression,
iotypes=(BytesIO, StringIO, NativeFile),
Expand Down
6 changes: 4 additions & 2 deletions python/cudf/cudf/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ def read_json(
source = ioutils.stringify_pathlike(source)
source = fs.sep.join([source, "*.json"])

tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=compression,
iotypes=(BytesIO, StringIO),
allow_raw_text_input=True,
**kwargs,
)
if isinstance(tmp_source, list):
Expand Down Expand Up @@ -73,10 +74,11 @@ def read_json(
"multiple files via pandas"
)

path_or_buf, compression = ioutils.get_filepath_or_buffer(
path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=path_or_buf,
compression=compression,
iotypes=(BytesIO, StringIO),
allow_raw_text_input=True,
**kwargs,
)

Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def read_orc_statistics(
files_statistics = []
stripes_statistics = []
for source in filepaths_or_buffers:
filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source, compression=None, **kwargs
)
if compression is not None:
Expand All @@ -182,7 +182,7 @@ def read_orc_statistics(
column_names,
raw_file_statistics,
raw_stripes_statistics,
) = liborc.read_raw_orc_statistics(filepath_or_buffer)
) = liborc.read_raw_orc_statistics(path_or_buf)

# Parse column names
column_names = [
Expand Down Expand Up @@ -323,7 +323,7 @@ def read_orc(
source = stringify_path(source)
source = fs.sep.join([source, "*.orc"])

tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=None,
use_python_file_object=use_python_file_object,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def read_parquet(
fs=fs,
)
for i, source in enumerate(filepath_or_buffer):
tmp_source, compression = ioutils.get_filepath_or_buffer(
tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=source,
compression=None,
fs=fs,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def read_text(
):
"""{docstring}"""

filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
path_or_data=filepath_or_buffer,
compression=None,
iotypes=(BytesIO, StringIO),
Expand Down
26 changes: 18 additions & 8 deletions python/cudf/cudf/utils/ioutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ def _open_remote_files(
]


def get_filepath_or_buffer(
def get_reader_filepath_or_buffer(
path_or_data,
compression,
mode="rb",
Expand All @@ -1328,6 +1328,7 @@ def get_filepath_or_buffer(
byte_ranges=None,
use_python_file_object=False,
open_file_options=None,
allow_raw_text_input=False,
**kwargs,
):
"""Return either a filepath string to data, or a memory buffer of data.
Expand All @@ -1352,6 +1353,11 @@ def get_filepath_or_buffer(
open_file_options : dict, optional
Optional dictionary of key-word arguments to pass to
`_open_remote_files` (used for remote storage only).
allow_raw_text_input : boolean, default False
If True, this indicates the input `path_or_data` could be a raw text
input and will not check for its existence in the filesystem. If False,
the input must be a path and an error will be raised if it does not
exist.
Returns
-------
Expand All @@ -1372,18 +1378,22 @@ def get_filepath_or_buffer(
if fs is None:
return path_or_data, compression

if len(paths) == 0:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)

if _is_local_filesystem(fs):
# Doing this as `read_json` accepts a json string
# path_or_data need not be a filepath like string
if os.path.exists(paths[0]):
path_or_data = paths if len(paths) > 1 else paths[0]
if len(paths):
if fs.exists(paths[0]):
path_or_data = paths if len(paths) > 1 else paths[0]
elif not allow_raw_text_input:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)

else:
if len(paths) == 0:
raise FileNotFoundError(
f"{path_or_data} could not be resolved to any files"
)
if use_python_file_object:
path_or_data = _open_remote_files(
paths,
Expand Down

0 comments on commit a775bcb

Please sign in to comment.