Merge branch 'rapidsai:branch-22.06' into conda_compilers

rapidsai · May 23, 2022 · a775bcb · a775bcb
2 parents 78d7793 + 5067cc7
commit a775bcb
Show file tree

Hide file tree

Showing 16 changed files with 99 additions and 32 deletions.
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
@@ -0,0 +1,8 @@
+c_compiler_version:
+  - 9
+
+cxx_compiler_version:
+  - 9
+
+sysroot_version:
+  - "2.17"
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
@@ -19,15 +19,16 @@ build:
   script_env:
     - VERSION_SUFFIX
     - PARALLEL_LEVEL
-    - CC
-    - CXX
-    - CUDAHOSTCXX
   # libcudf's run_exports pinning is looser than we would like
   ignore_run_exports:
     - libcudf
 
 requirements:
   build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+  host:
     - protobuf
     - python
     - cython >=0.29,<0.30

diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml
@@ -0,0 +1,8 @@
+c_compiler_version:
+  - 9
+
+cxx_compiler_version:
+  - 9
+
+sysroot_version:
+  - "2.17"
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
@@ -17,15 +17,15 @@ build:
   number: {{ GIT_DESCRIBE_NUMBER }}
   string: py{{ py_version_numeric }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
   script_env:
-    - CC
-    - CXX
-    - CUDAHOSTCXX
     - PARALLEL_LEVEL
     - VERSION_SUFFIX
 
 requirements:
   build:
     - cmake >=3.20.1,!=3.23.0
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
     - python
     - cython >=0.29,<0.30

diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
@@ -19,9 +19,6 @@ build:
   script_env:
     - VERSION_SUFFIX
     - PARALLEL_LEVEL
-    - CC
-    - CXX
-    - CUDAHOSTCXX
 
 requirements:
   host:

diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
@@ -19,9 +19,6 @@ build:
   script_env:
     - VERSION_SUFFIX
     - PARALLEL_LEVEL
-    - CC
-    - CXX
-    - CUDAHOSTCXX
 
 requirements:
   host:

diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
@@ -1,3 +1,15 @@
+c_compiler_version:
+  - 9
+
+cxx_compiler_version:
+  - 9
+
+cuda_compiler:
+  - nvcc
+
+sysroot_version:
+  - "2.17"
+
 cmake_version:
   - ">=3.20.1,!=3.23.0"
 

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -14,9 +14,6 @@ source:
 
 build:
   script_env:
-    - CC
-    - CXX
-    - CUDAHOSTCXX
     - PARALLEL_LEVEL
     - CMAKE_GENERATOR
     - CMAKE_C_COMPILER_LAUNCHER
@@ -31,6 +28,10 @@ build:
 requirements:
   build:
     - cmake {{ cmake_version }}
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - {{ compiler('cuda') }} {{ cuda_version }}
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
   host:
     - librmm {{ minor_version }}.*
     - cudatoolkit {{ cuda_version }}.*
@@ -48,6 +49,8 @@ outputs:
       string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
       run_exports:
         - {{ pin_subpackage("libcudf", max_pin="x.x") }}
+      ignore_run_exports_from:
+        - {{ compiler('cuda') }}
     requirements:
       build:
         - cmake {{ cmake_version }}
@@ -287,6 +290,8 @@ outputs:
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      ignore_run_exports_from:
+        - {{ compiler('cuda') }}
     requirements:
       build:
         - cmake {{ cmake_version }}
@@ -308,9 +313,14 @@ outputs:
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: {{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      ignore_run_exports_from:
+        - {{ compiler('cuda') }}
     requirements:
       build:
         - cmake {{ cmake_version }}
+        - {{ compiler('c') }}
+        - {{ compiler('cxx') }}
+        - sysroot_{{ target_platform }} {{ sysroot_version }}
       host:
         - {{ pin_subpackage('libcudf', exact=True) }}
       run:
@@ -327,6 +337,8 @@ outputs:
     build:
       number: {{ GIT_DESCRIBE_NUMBER }}
       string: cuda{{ cuda_major }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+      ignore_run_exports_from:
+        - {{ compiler('cuda') }}
     requirements:
       build:
         - cmake {{ cmake_version }}

diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -75,6 +75,19 @@ namespace row {
 enum class lhs_index_type : size_type {};
 enum class rhs_index_type : size_type {};
 
+/**
+ * @brief A counting iterator that uses strongly typed indices bound to tables.
+ *
+ * Performing lexicographic or equality comparisons between values in two
+ * tables requires the use of strongly typed indices. The strong index types
+ * `lhs_index_type` and `rhs_index_type` ensure that index values are bound to
+ * the correct table, regardless of the order in which these indices are
+ * provided to the call operator. This struct and its type aliases
+ * `lhs_iterator` and `rhs_iterator` provide an interface similar to a counting
+ * iterator, with strongly typed values to represent the table indices.
+ *
+ * @tparam Index The strong index type
+ */
 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
                                                               Index,
@@ -110,7 +123,14 @@ struct strong_index_iterator : public thrust::iterator_facade<strong_index_itera
   Underlying begin{};
 };
 
+/**
+ * @brief Iterator representing indices into a left-side table.
+ */
 using lhs_iterator = strong_index_iterator<lhs_index_type>;
+
+/**
+ * @brief Iterator representing indices into a right-side table.
+ */
 using rhs_iterator = strong_index_iterator<rhs_index_type>;
 
 namespace lexicographic {

diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py
@@ -24,7 +24,7 @@ def read_avro(
             "`read_avro` does not yet support reading multiple files"
         )
 
-    filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
+    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer, compression=None, **kwargs
     )
     if compression is not None:

diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
@@ -60,7 +60,7 @@ def read_csv(
             "`read_csv` does not yet support reading multiple files"
         )
 
-    filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
+    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         compression=compression,
         iotypes=(BytesIO, StringIO, NativeFile),

diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
@@ -42,10 +42,11 @@ def read_json(
                 source = ioutils.stringify_pathlike(source)
                 source = fs.sep.join([source, "*.json"])
 
-            tmp_source, compression = ioutils.get_filepath_or_buffer(
+            tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
                 path_or_data=source,
                 compression=compression,
                 iotypes=(BytesIO, StringIO),
+                allow_raw_text_input=True,
                 **kwargs,
             )
             if isinstance(tmp_source, list):
@@ -73,10 +74,11 @@ def read_json(
                 "multiple files via pandas"
             )
 
-        path_or_buf, compression = ioutils.get_filepath_or_buffer(
+        path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
             path_or_data=path_or_buf,
             compression=compression,
             iotypes=(BytesIO, StringIO),
+            allow_raw_text_input=True,
             **kwargs,
         )
 

diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
@@ -171,7 +171,7 @@ def read_orc_statistics(
     files_statistics = []
     stripes_statistics = []
     for source in filepaths_or_buffers:
-        filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
+        path_or_buf, compression = ioutils.get_reader_filepath_or_buffer(
             path_or_data=source, compression=None, **kwargs
         )
         if compression is not None:
@@ -182,7 +182,7 @@ def read_orc_statistics(
             column_names,
             raw_file_statistics,
             raw_stripes_statistics,
-        ) = liborc.read_raw_orc_statistics(filepath_or_buffer)
+        ) = liborc.read_raw_orc_statistics(path_or_buf)
 
         # Parse column names
         column_names = [
@@ -323,7 +323,7 @@ def read_orc(
             source = stringify_path(source)
             source = fs.sep.join([source, "*.orc"])
 
-        tmp_source, compression = ioutils.get_filepath_or_buffer(
+        tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
             path_or_data=source,
             compression=None,
             use_python_file_object=use_python_file_object,

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
@@ -435,7 +435,7 @@ def read_parquet(
             fs=fs,
         )
     for i, source in enumerate(filepath_or_buffer):
-        tmp_source, compression = ioutils.get_filepath_or_buffer(
+        tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
             path_or_data=source,
             compression=None,
             fs=fs,

diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
@@ -18,7 +18,7 @@ def read_text(
 ):
     """{docstring}"""
 
-    filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
+    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         compression=None,
         iotypes=(BytesIO, StringIO),

diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
@@ -1319,7 +1319,7 @@ def _open_remote_files(
     ]
 
 
-def get_filepath_or_buffer(
+def get_reader_filepath_or_buffer(
     path_or_data,
     compression,
     mode="rb",
@@ -1328,6 +1328,7 @@ def get_filepath_or_buffer(
     byte_ranges=None,
     use_python_file_object=False,
     open_file_options=None,
+    allow_raw_text_input=False,
     **kwargs,
 ):
     """Return either a filepath string to data, or a memory buffer of data.
@@ -1352,6 +1353,11 @@ def get_filepath_or_buffer(
     open_file_options : dict, optional
         Optional dictionary of key-word arguments to pass to
         `_open_remote_files` (used for remote storage only).
+    allow_raw_text_input : boolean, default False
+        If True, this indicates the input `path_or_data` could be a raw text
+        input and will not check for its existence in the filesystem. If False,
+        the input must be a path and an error will be raised if it does not
+        exist.
 
     Returns
     -------
@@ -1372,18 +1378,22 @@ def get_filepath_or_buffer(
             if fs is None:
                 return path_or_data, compression
 
-        if len(paths) == 0:
-            raise FileNotFoundError(
-                f"{path_or_data} could not be resolved to any files"
-            )
-
         if _is_local_filesystem(fs):
             # Doing this as `read_json` accepts a json string
             # path_or_data need not be a filepath like string
-            if os.path.exists(paths[0]):
-                path_or_data = paths if len(paths) > 1 else paths[0]
+            if len(paths):
+                if fs.exists(paths[0]):
+                    path_or_data = paths if len(paths) > 1 else paths[0]
+                elif not allow_raw_text_input:
+                    raise FileNotFoundError(
+                        f"{path_or_data} could not be resolved to any files"
+                    )
 
         else:
+            if len(paths) == 0:
+                raise FileNotFoundError(
+                    f"{path_or_data} could not be resolved to any files"
+                )
             if use_python_file_object:
                 path_or_data = _open_remote_files(
                     paths,