diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 07d6a0e50af..035c89f8bcc 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -75,10 +75,10 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
# conda install "your-pkg=1.0.0"
# Install the master version of dask, distributed, and streamz
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps"
pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index d88fe837103..8299afa18a9 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -101,8 +101,8 @@ function install_dask {
# Install the main version of dask, distributed, and streamz
gpuci_logger "Install the main version of dask, distributed, and streamz"
set -x
- pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
- pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+ pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+ pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
set +x
}
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 44396715e02..21ced3a0022 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -60,7 +60,7 @@ dependencies:
- cachetools
- transformers
- pip:
- - git+https://github.com/dask/dask.git@main
- - git+https://github.com/dask/distributed.git@main
+ - git+https://github.com/dask/dask.git@2021.06.0
+ - git+https://github.com/dask/distributed.git@2021.06.0
- git+https://github.com/python-streamz/streamz.git
- pyorc
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 8d88fb3d583..156a028ffdb 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -60,7 +60,7 @@ dependencies:
- cachetools
- transformers
- pip:
- - git+https://github.com/dask/dask.git@main
- - git+https://github.com/dask/distributed.git@main
+ - git+https://github.com/dask/dask.git@2021.06.0
+ - git+https://github.com/dask/distributed.git@2021.06.0
- git+https://github.com/python-streamz/streamz.git
- pyorc
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
index 3fc1182b33b..34d3fd632b7 100644
--- a/conda/recipes/dask-cudf/run_test.sh
+++ b/conda/recipes/dask-cudf/run_test.sh
@@ -9,11 +9,11 @@ function logger() {
}
# Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
logger "python -c 'import dask_cudf'"
python -c "import dask_cudf"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 090f613a9d1..87a04a17b37 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -168,6 +168,7 @@ add_library(cudf
src/copying/gather.cu
src/copying/get_element.cu
src/copying/pack.cpp
+ src/copying/reverse.cu
src/copying/sample.cu
src/copying/scatter.cu
src/copying/shift.cu
diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
index c1c29a693d5..79d3c0770a3 100644
--- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
+++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
@@ -50,6 +50,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC)
"ARROW_WITH_BACKTRACE ON"
"ARROW_CXXFLAGS -w"
"ARROW_JEMALLOC OFF"
+ "ARROW_S3 ON"
# Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off
"ARROW_USE_CCACHE OFF"
"ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}"
diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md
index f2873e31c5b..8ec111acdb2 100644
--- a/cpp/docs/DEVELOPER_GUIDE.md
+++ b/cpp/docs/DEVELOPER_GUIDE.md
@@ -342,6 +342,7 @@ namespace detail{
} // namespace detail
void external_function(...){
+ CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function
detail::external_function(...);
}
```
@@ -355,6 +356,12 @@ asynchrony if and when we add an asynchronous API to libcudf.
**Note:** `cudaDeviceSynchronize()` should *never* be used.
This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs.
+ ### NVTX Ranges
+
+ In order to aid in performance optimization and debugging, all compute intensive libcudf functions should have a corresponding NVTX range.
+ In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` that will automatically annotate the lifetime of the enclosing function and use the functions name as the name of the NVTX range.
+ For more information about NVTX, see [here](https://github.com/NVIDIA/NVTX/tree/dev/cpp).
+
### Stream Creation
There may be times in implementing libcudf features where it would be advantageous to use streams
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index c9a4eab2154..477c53535de 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -81,6 +81,36 @@ std::unique_ptr
gather(
out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/**
+ * @brief Reverses the rows within a table.
+ * Creates a new table that is the reverse of @p source_table.
+ * Example:
+ * ```
+ * source = [[4,5,6], [7,8,9], [10,11,12]]
+ * return = [[6,5,4], [9,8,7], [12,11,10]]
+ * ```
+ *
+ * @param source_table Table that will be reversed
+ */
+std::unique_ptr reverse(
+ table_view const& source_table,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Reverses the elements of a column
+ * Creates a new column that is the reverse of @p source_column.
+ * Example:
+ * ```
+ * source = [4,5,6]
+ * return = [6,5,4]
+ * ```
+ *
+ * @param source_column Column that will be reversed
+ */
+std::unique_ptr reverse(
+ column_view const& source_column,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
/**
* @brief Scatters the rows of the source table into a copy of the target table
* according to a scatter map.
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 410cd213618..d71a8d0ec24 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -305,7 +305,7 @@ struct column_scatterer_impl {
[](auto const& col) { return col.nullable(); });
if (child_nullable) {
auto const gather_map =
- scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream);
+ scatter_to_gather(scatter_map_begin, scatter_map_end, target.size(), stream);
gather_bitmask(cudf::table_view{std::vector{structs_src.child_begin(),
structs_src.child_end()}},
gather_map.begin(),
diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index ab7a3a6fa9b..6c885a874ee 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -22,9 +22,13 @@
#include
#include
+#include
+#include
#include
#include
#include
+#include
+#include
#include
@@ -302,6 +306,34 @@ class arrow_io_source : public datasource {
};
public:
+ /**
+ * @brief Constructs an object from an Apache Arrow Filesystem URI
+ *
+ * @param Apache Arrow Filesystem URI
+ */
+ explicit arrow_io_source(std::string_view arrow_uri)
+ {
+ const std::string uri_start_delimiter = "//";
+ const std::string uri_end_delimiter = "?";
+
+ arrow::Result> result =
+ arrow::fs::FileSystemFromUri(static_cast(arrow_uri));
+ CUDF_EXPECTS(result.ok(), "Failed to generate Arrow Filesystem instance from URI.");
+ filesystem = result.ValueOrDie();
+
+ // Parse the path from the URI
+ size_t start = arrow_uri.find(uri_start_delimiter) == std::string::npos
+ ? 0
+ : arrow_uri.find(uri_start_delimiter) + uri_start_delimiter.size();
+ size_t end = arrow_uri.find(uri_end_delimiter) - start;
+ std::string_view path = arrow_uri.substr(start, end);
+
+ arrow::Result> in_stream =
+ filesystem->OpenInputFile(static_cast(path).c_str());
+ CUDF_EXPECTS(in_stream.ok(), "Failed to open Arrow RandomAccessFile");
+ arrow_file = in_stream.ValueOrDie();
+ }
+
/**
* @brief Constructs an object from an `arrow` source object.
*
@@ -340,6 +372,7 @@ class arrow_io_source : public datasource {
}
private:
+ std::shared_ptr filesystem;
std::shared_ptr arrow_file;
};
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 428a4195bf8..1f9ed71ce8c 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -22,6 +22,7 @@
#include
#include
+#include
#include
namespace cudf {
@@ -522,13 +523,15 @@ class hash_join {
/**
* Returns the row indices that can be used to construct the result of performing
- * an inner join between two tables. @see cudf::inner_join().
+ * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the
+ * provided `output_size` is smaller than the actual output size.
*
* @param probe The probe table, from which the tuples are probed.
* @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
+ * @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device
* memory.
- * @param stream CUDA stream used for device memory operations and kernel launches
*
* @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
* the result of performing an inner join between two tables with `build` and `probe`
@@ -537,19 +540,22 @@ class hash_join {
std::pair>,
std::unique_ptr>>
inner_join(cudf::table_view const& probe,
- null_equality compare_nulls = null_equality::EQUAL,
- rmm::cuda_stream_view stream = rmm::cuda_stream_default,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+ null_equality compare_nulls = null_equality::EQUAL,
+ std::optional output_size = {},
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
/**
* Returns the row indices that can be used to construct the result of performing
- * a left join between two tables. @see cudf::left_join().
+ * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the
+ * provided `output_size` is smaller than the actual output size.
*
* @param probe The probe table, from which the tuples are probed.
* @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
+ * @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device
* memory.
- * @param stream CUDA stream used for device memory operations and kernel launches
*
* @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
* the result of performing a left join between two tables with `build` and `probe`
@@ -558,19 +564,22 @@ class hash_join {
std::pair>,
std::unique_ptr>>
left_join(cudf::table_view const& probe,
- null_equality compare_nulls = null_equality::EQUAL,
- rmm::cuda_stream_view stream = rmm::cuda_stream_default,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+ null_equality compare_nulls = null_equality::EQUAL,
+ std::optional output_size = {},
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
/**
* Returns the row indices that can be used to construct the result of performing
- * a full join between two tables. @see cudf::full_join().
+ * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the
+ * provided `output_size` is smaller than the actual output size.
*
* @param probe The probe table, from which the tuples are probed.
* @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
+ * @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned table and columns' device
* memory.
- * @param stream CUDA stream used for device memory operations and kernel launches
*
* @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
* the result of performing a full join between two tables with `build` and `probe`
@@ -579,9 +588,59 @@ class hash_join {
std::pair>,
std::unique_ptr>>
full_join(cudf::table_view const& probe,
- null_equality compare_nulls = null_equality::EQUAL,
- rmm::cuda_stream_view stream = rmm::cuda_stream_default,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+ null_equality compare_nulls = null_equality::EQUAL,
+ std::optional output_size = {},
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
+ /**
+ * Returns the exact number of matches (rows) when performing an inner join with the specified
+ * probe table.
+ *
+ * @param probe The probe table, from which the tuples are probed.
+ * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return The exact number of output when performing an inner join between two tables with
+ * `build` and `probe` as the the join keys .
+ */
+ std::size_t inner_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls = null_equality::EQUAL,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+ /**
+ * Returns the exact number of matches (rows) when performing a left join with the specified probe
+ * table.
+ *
+ * @param probe The probe table, from which the tuples are probed.
+ * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return The exact number of output when performing a left join between two tables with `build`
+ * and `probe` as the the join keys .
+ */
+ std::size_t left_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls = null_equality::EQUAL,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+ /**
+ * Returns the exact number of matches (rows) when performing a full join with the specified probe
+ * table.
+ *
+ * @param probe The probe table, from which the tuples are probed.
+ * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the intermediate table and columns' device
+ * memory.
+ *
+ * @return The exact number of output when performing a full join between two tables with `build`
+ * and `probe` as the the join keys .
+ */
+ std::size_t full_join_size(
+ cudf::table_view const& probe,
+ null_equality compare_nulls = null_equality::EQUAL,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
private:
struct hash_join_impl;
diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp
index 4d29b0a5b6a..17430d3eafe 100644
--- a/cpp/include/cudf/strings/convert/convert_integers.hpp
+++ b/cpp/include/cudf/strings/convert/convert_integers.hpp
@@ -171,7 +171,7 @@ std::unique_ptr hex_to_integers(
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', 'AGE', '+17EA', '0x9EF' '123ABC']
- * b = s.is_hex(s)
+ * b = is_hex(s)
* b is [true, false, false, false, false, true, true]
* @endcode
*
@@ -185,6 +185,37 @@ std::unique_ptr is_hex(
strings_column_view const& strings,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/**
+ * @brief Returns a new strings column converting integer columns to hexadecimal
+ * characters.
+ *
+ * Any null entries will result in corresponding null entries in the output column.
+ *
+ * The output character set is '0'-'9' and 'A'-'F'. The output string width will
+ * be a multiple of 2 depending on the size of the integer type. A single leading
+ * zero is applied to the first non-zero output byte if it less than 0x10.
+ *
+ * @code{.pseudo}
+ * Example:
+ * input = [123, -1, 0, 27, 342718233] // int32 type input column
+ * s = integers_to_hex(input)
+ * s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719']
+ * @endcode
+ *
+ * The example above shows an `INT32` type column where each integer is 4 bytes.
+ * Leading zeros are suppressed unless filling out a complete byte as in
+ * `123 -> '04D2'` instead of `000004D2` or `4D2`.
+ *
+ * @throw cudf::logic_error if the input column is not integral type.
+ *
+ * @param input Integer column to convert to hex.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column with hexadecimal characters.
+ */
+std::unique_ptr integers_to_hex(
+ column_view const& input,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
/** @} */ // end of doxygen group
} // namespace strings
} // namespace cudf
diff --git a/cpp/include/cudf/wrappers/timestamps.hpp b/cpp/include/cudf/wrappers/timestamps.hpp
index 275ac20048e..ac13dae6a74 100644
--- a/cpp/include/cudf/wrappers/timestamps.hpp
+++ b/cpp/include/cudf/wrappers/timestamps.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -32,19 +32,7 @@ template
using time_point = cuda::std::chrono::sys_time;
template
-struct timestamp : time_point {
- // Bring over base class constructors and make them visible here
- using time_point::time_point;
-
- // This is needed as __shared__ objects of this type can't be assigned in device code
- // when the initializer list constructs subobjects with values, which is what std::time_point
- // does.
- constexpr timestamp() : time_point(Duration()){};
-
- // The inherited copy constructor will hide the auto generated copy constructor;
- // hence, explicitly define and delegate
- constexpr timestamp(const time_point& other) : time_point(other) {}
-};
+using timestamp = time_point;
} // namespace detail
/**
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 809390553a4..4b11382a3f2 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -886,13 +886,15 @@ std::vector contiguous_split(cudf::table_view const& input,
size_type* offset_stack = &d_offset_stack[stack_pos];
int parent_offsets_index = src_info.parent_offsets_index;
int stack_size = 0;
+ int root_column_offset = src_info.column_offset;
while (parent_offsets_index >= 0) {
offset_stack[stack_size++] = parent_offsets_index;
+ root_column_offset = d_src_buf_info[parent_offsets_index].column_offset;
parent_offsets_index = d_src_buf_info[parent_offsets_index].parent_offsets_index;
}
- // make sure to include the -column- offset in our calculations
- int row_start = d_indices[split_index] + src_info.column_offset;
- int row_end = d_indices[split_index + 1] + src_info.column_offset;
+ // make sure to include the -column- offset on the root column in our calculation.
+ int row_start = d_indices[split_index] + root_column_offset;
+ int row_end = d_indices[split_index + 1] + root_column_offset;
while (stack_size > 0) {
stack_size--;
auto const offsets = d_src_buf_info[offset_stack[stack_size]].offsets;
@@ -923,6 +925,7 @@ std::vector contiguous_split(cudf::table_view const& input,
int const element_size = cudf::type_dispatcher(data_type{src_info.type}, size_of_helper{});
std::size_t const bytes =
static_cast(num_elements) * static_cast(element_size);
+
return dst_buf_info{_round_up_safe(bytes, 64),
num_elements,
element_size,
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 9f8e6f7bdcb..b0de9cd750e 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -195,21 +195,6 @@ std::unique_ptr scatter_gather_based_if_else(Left const& lhs,
{
if constexpr (std::is_same::value &&
std::is_same::value) {
- auto const null_map_entry = size + 1; // Out of bounds index, for gather() to nullify.
-
- auto const gather_lhs = make_counting_transform_iterator(
- size_type{0}, lhs_gather_map_functor{is_left, null_map_entry});
-
- auto const lhs_gathered_columns =
- cudf::detail::gather(table_view{std::vector{lhs}},
- gather_lhs,
- gather_lhs + size,
- out_of_bounds_policy::NULLIFY,
- stream,
- mr)
- ->release();
- auto& lhs_partial_output = lhs_gathered_columns[0];
-
auto scatter_map_rhs = rmm::device_uvector{static_cast(size), stream};
auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(size_type{0}),
@@ -227,7 +212,7 @@ std::unique_ptr scatter_gather_based_if_else(Left const& lhs,
table_view{std::vector{scatter_src_rhs->get_column(0).view()}},
scatter_map_rhs.begin(),
scatter_map_end,
- table_view{std::vector{lhs_partial_output->view()}},
+ table_view{std::vector{lhs}},
false,
stream,
mr);
diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu
new file mode 100644
index 00000000000..73a36d70f7b
--- /dev/null
+++ b/cpp/src/copying/reverse.cu
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace cudf {
+namespace detail {
+std::unique_ptr reverse(table_view const& source_table,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ size_type num_rows = source_table.num_rows();
+ auto elements =
+ make_counting_transform_iterator(0, [num_rows] __device__(auto i) { return num_rows - i - 1; });
+ auto elements_end = elements + source_table.num_rows();
+
+ return gather(source_table, elements, elements_end, out_of_bounds_policy::DONT_CHECK, stream, mr);
+}
+
+std::unique_ptr reverse(column_view const& source_column,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ return std::move(cudf::reverse(table_view({source_column}))->release().front());
+}
+} // namespace detail
+
+std::unique_ptr reverse(table_view const& source_table, rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::reverse(source_table, rmm::cuda_stream_default, mr);
+}
+
+std::unique_ptr reverse(column_view const& source_column,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::reverse(source_column, rmm::cuda_stream_default, mr);
+}
+} // namespace cudf
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 2aa1e2d866a..4a2330d479b 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -565,6 +565,8 @@ orc_streams::orc_stream_offsets orc_streams::compute_offsets(
// Everything else uses RLE
return true;
}();
+ // non-RLE and RLE streams are separated in the buffer that stores encoded data
+ // The computed offsets do not take the streams of the other type into account
if (is_rle_data) {
strm_offsets[i] = rle_data_size;
rle_data_size += (stream.length * num_rowgroups + 7) & ~7;
@@ -681,6 +683,10 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
: (((stripe_dict->num_strings + 0x1ff) >> 9) * (512 * 4 + 2));
if (stripe.id == 0) {
strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.offsets[strm_id];
+ // Dictionary lengths are encoded as RLE, which are all stored after non-RLE data:
+ // include non-RLE data size in the offset only in that case
+ if (strm_type == gpu::CI_DATA2 && ck.encoding_kind == DICTIONARY_V2)
+ strm.data_ptrs[strm_type] += stream_offsets.non_rle_data_size;
} else {
auto const &strm_up = col_streams[stripe_dict[-dict_stride].start_chunk];
strm.data_ptrs[strm_type] =
@@ -710,7 +716,8 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
: (col_streams[rg_idx - 1].data_ptrs[strm_type] +
col_streams[rg_idx - 1].lengths[strm_type]);
} else {
- strm.lengths[strm_type] = streams[strm_id].length;
+ strm.lengths[strm_type] = streams[strm_id].length;
+ // RLE encoded streams are stored after all non-RLE streams
strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.non_rle_data_size +
stream_offsets.offsets[strm_id] +
streams[strm_id].length * rg_idx;
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 3f59bc13dda..dfe3231e897 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -84,6 +84,7 @@ struct valid_range {
* @param left_table_row_count Number of rows of left table
* @param right_table_row_count Number of rows of right table
* @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned vectors.
*
* @return Pair of vectors containing the left join indices complement
*/
@@ -208,6 +209,7 @@ std::unique_ptr> build_join_
/**
* @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table`,
* and returns the output indices of `build_table` and `probe_table` as a combined table.
+ * Behavior is undefined if the provided `output_size` is smaller than the actual output size.
*
* @tparam JoinKind The type of join to be performed.
*
@@ -215,7 +217,9 @@ std::unique_ptr> build_join_
* @param probe_table Table of probe side columns to join.
* @param hash_table Hash table built from `build_table`.
* @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
* @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned vectors.
*
* @return Join output indices vector pair.
*/
@@ -226,39 +230,52 @@ probe_join_hash_table(cudf::table_device_view build_table,
cudf::table_device_view probe_table,
multimap_type const &hash_table,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
- std::size_t estimated_size = estimate_join_output_size(
- build_table, probe_table, hash_table, compare_nulls, stream);
+ // Use the output size directly if provided. Otherwise, compute the exact output size
+ constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
+ ? cudf::detail::join_kind::LEFT_JOIN
+ : JoinKind;
+ std::size_t const join_size = output_size.value_or(compute_join_output_size(
+ build_table, probe_table, hash_table, compare_nulls, stream));
- // If the estimated output size is zero, return immediately
- if (estimated_size == 0) {
+ // If output size is zero, return immediately
+ if (join_size == 0) {
return std::make_pair(std::make_unique>(0, stream, mr),
std::make_unique>(0, stream, mr));
}
- // Because we are approximating the number of joined elements, our approximation
- // might be incorrect and we might have underestimated the number of joined elements.
- // As such we will need to de-allocate memory and re-allocate memory to ensure
- // that the final output is correct.
rmm::device_scalar write_index(0, stream);
- std::size_t join_size{0};
-
- auto left_indices = std::make_unique>(0, stream, mr);
- auto right_indices = std::make_unique>(0, stream, mr);
-
- auto current_estimated_size = estimated_size;
- do {
- left_indices->resize(estimated_size, stream);
- right_indices->resize(estimated_size, stream);
- constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
- detail::grid_1d config(probe_table.num_rows(), block_size);
- write_index.set_value_to_zero_async(stream);
+ auto left_indices = std::make_unique>(join_size, stream, mr);
+ auto right_indices = std::make_unique>(join_size, stream, mr);
- row_hash hash_probe{probe_table};
- row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+ constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
+ detail::grid_1d config(probe_table.num_rows(), block_size);
+
+ row_hash hash_probe{probe_table};
+ row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+ if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
+ probe_hash_table
+ <<>>(
+ hash_table,
+ build_table,
+ probe_table,
+ hash_probe,
+ equality,
+ left_indices->data(),
+ right_indices->data(),
+ write_index.data(),
+ join_size);
+ auto const actual_size = write_index.value(stream);
+ left_indices->resize(actual_size, stream);
+ right_indices->resize(actual_size, stream);
+ } else {
probe_hash_table
<<>>(
hash_table,
@@ -269,18 +286,101 @@ probe_join_hash_table(cudf::table_device_view build_table,
left_indices->data(),
right_indices->data(),
write_index.data(),
- estimated_size);
+ join_size);
+ }
+ return std::make_pair(std::move(left_indices), std::move(right_indices));
+}
+
+/**
+ * @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table` twice,
+ * and returns the output size of a full join operation between `build_table` and `probe_table`.
+ * TODO: this is a temporary solution as part of `full_join_size`. To be refactored during
+ * cuco integration.
+ *
+ * @param build_table Table of build side columns to join.
+ * @param probe_table Table of probe side columns to join.
+ * @param hash_table Hash table built from `build_table`.
+ * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the intermediate vectors.
+ *
+ * @return Output size of full join.
+ */
+std::size_t get_full_join_size(cudf::table_device_view build_table,
+ cudf::table_device_view probe_table,
+ multimap_type const &hash_table,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource *mr)
+{
+ std::size_t join_size = compute_join_output_size(
+ build_table, probe_table, hash_table, compare_nulls, stream);
- CHECK_CUDA(stream.value());
+ // If output size is zero, return immediately
+ if (join_size == 0) { return join_size; }
- join_size = write_index.value(stream);
- current_estimated_size = estimated_size;
- estimated_size *= 2;
- } while ((current_estimated_size < join_size));
+ rmm::device_scalar write_index(0, stream);
- left_indices->resize(join_size, stream);
- right_indices->resize(join_size, stream);
- return std::make_pair(std::move(left_indices), std::move(right_indices));
+ auto left_indices = std::make_unique>(join_size, stream, mr);
+ auto right_indices = std::make_unique>(join_size, stream, mr);
+
+ constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
+ detail::grid_1d config(probe_table.num_rows(), block_size);
+
+ row_hash hash_probe{probe_table};
+ row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+ probe_hash_table
+ <<>>(hash_table,
+ build_table,
+ probe_table,
+ hash_probe,
+ equality,
+ left_indices->data(),
+ right_indices->data(),
+ write_index.data(),
+ join_size);
+ // Rlease intermediate memory alloation
+ left_indices->resize(0, stream);
+
+ auto const left_table_row_count = probe_table.num_rows();
+ auto const right_table_row_count = build_table.num_rows();
+
+ std::size_t left_join_complement_size;
+
+ // If left table is empty then all rows of the right table should be represented in the joined
+ // indices.
+ if (left_table_row_count == 0) {
+ left_join_complement_size = right_table_row_count;
+ } else {
+ // Assume all the indices in invalid_index_map are invalid
+ auto invalid_index_map =
+ std::make_unique>(right_table_row_count, stream);
+ thrust::uninitialized_fill(
+ rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});
+
+ // Functor to check for index validity since left joins can create invalid indices
+ valid_range valid(0, right_table_row_count);
+
+ // invalid_index_map[index_ptr[i]] = 0 for i = 0 to right_table_row_count
+ // Thus specifying that those locations are valid
+ thrust::scatter_if(rmm::exec_policy(stream),
+ thrust::make_constant_iterator(0),
+ thrust::make_constant_iterator(0) + right_indices->size(),
+ right_indices->begin(), // Index locations
+ right_indices->begin(), // Stencil - Check if index location is valid
+ invalid_index_map->begin(), // Output indices
+ valid); // Stencil Predicate
+
+ // Create list of indices that have been marked as invalid
+ left_join_complement_size = thrust::count_if(rmm::exec_policy(stream),
+ invalid_index_map->begin(),
+ invalid_index_map->end(),
+ thrust::identity());
+ }
+ return join_size + left_join_complement_size;
}
std::unique_ptr combine_table_pair(std::unique_ptr &&left,
@@ -323,33 +423,85 @@ std::pair>,
std::unique_ptr>>
hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr) const
{
CUDF_FUNC_RANGE();
- return compute_hash_join(probe, compare_nulls, stream, mr);
+ return compute_hash_join(
+ probe, compare_nulls, output_size, stream, mr);
}
std::pair>,
std::unique_ptr>>
hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr) const
{
CUDF_FUNC_RANGE();
- return compute_hash_join(probe, compare_nulls, stream, mr);
+ return compute_hash_join(
+ probe, compare_nulls, output_size, stream, mr);
}
std::pair>,
std::unique_ptr>>
hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr) const
{
CUDF_FUNC_RANGE();
- return compute_hash_join(probe, compare_nulls, stream, mr);
+ return compute_hash_join(
+ probe, compare_nulls, output_size, stream, mr);
+}
+
+std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const &probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const
+{
+ CUDF_FUNC_RANGE();
+ CUDF_EXPECTS(_hash_table, "Hash table of hash join is null.");
+
+ auto build_table = cudf::table_device_view::create(_build, stream);
+ auto probe_table = cudf::table_device_view::create(probe, stream);
+
+ return cudf::detail::compute_join_output_size(
+ *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+}
+
+std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const &probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const
+{
+ CUDF_FUNC_RANGE();
+
+ // Trivial left join case - exit early
+ if (!_hash_table) { return probe.num_rows(); }
+
+ auto build_table = cudf::table_device_view::create(_build, stream);
+ auto probe_table = cudf::table_device_view::create(probe, stream);
+
+ return cudf::detail::compute_join_output_size(
+ *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+}
+
+std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const &probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource *mr) const
+{
+ CUDF_FUNC_RANGE();
+
+ // Trivial left join case - exit early
+ if (!_hash_table) { return probe.num_rows(); }
+
+ auto build_table = cudf::table_device_view::create(_build, stream);
+ auto probe_table = cudf::table_device_view::create(probe, stream);
+
+ return get_full_join_size(*build_table, *probe_table, *_hash_table, compare_nulls, stream, mr);
}
template
@@ -357,6 +509,7 @@ std::pair>,
std::unique_ptr>>
hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr) const
{
@@ -383,7 +536,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
[](const auto &b, const auto &p) { return b.type() == p.type(); }),
"Mismatch in joining column data types");
- return probe_join_indices(flattened_probe_table, compare_nulls, stream, mr);
+ return probe_join_indices(
+ flattened_probe_table, compare_nulls, output_size, stream, mr);
}
template
@@ -391,6 +545,7 @@ std::pair>,
std::unique_ptr>>
hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr) const
{
@@ -404,11 +559,8 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
auto build_table = cudf::table_device_view::create(_build, stream);
auto probe_table = cudf::table_device_view::create(probe, stream);
- constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
- ? cudf::detail::join_kind::LEFT_JOIN
- : JoinKind;
- auto join_indices = cudf::detail::probe_join_hash_table(
- *build_table, *probe_table, *_hash_table, compare_nulls, stream, mr);
+ auto join_indices = cudf::detail::probe_join_hash_table(
+ *build_table, *probe_table, *_hash_table, compare_nulls, output_size, stream, mr);
if (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
auto complement_indices = detail::get_left_join_indices_complement(
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 8fefda9f841..f9ccbd68c74 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -39,16 +39,11 @@
namespace cudf {
namespace detail {
/**
- * @brief Gives an estimate of the size of the join output produced when
+ * @brief Calculates the exact size of the join output produced when
* joining two tables together.
*
- * If the two tables are of relatively equal size, then the returned output
- * size will be the exact output size. However, if the probe table is
- * significantly larger than the build table, then we attempt to estimate the
- * output size by using only a subset of the rows in the probe table.
- *
* @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN
- * @throw cudf::logic_error if the estimated size overflows cudf::size_type
+ * @throw cudf::logic_error if the exact size overflows cudf::size_type
*
* @tparam JoinKind The type of join to be performed
* @tparam multimap_type The type of the hash table
@@ -60,28 +55,21 @@ namespace detail {
* @param compare_nulls Controls whether null join-key values should match or not.
* @param stream CUDA stream used for device memory operations and kernel launches
*
- * @return An estimate of the size of the output of the join operation
+ * @return The exact size of the output of the join operation
*/
template
-std::size_t estimate_join_output_size(table_device_view build_table,
- table_device_view probe_table,
- multimap_type const& hash_table,
- null_equality compare_nulls,
- rmm::cuda_stream_view stream)
+std::size_t compute_join_output_size(table_device_view build_table,
+ table_device_view probe_table,
+ multimap_type const& hash_table,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream)
{
const size_type build_table_num_rows{build_table.num_rows()};
const size_type probe_table_num_rows{probe_table.num_rows()};
- // If the probe table is significantly larger (5x) than the build table,
- // then we attempt to only use a subset of the probe table rows to compute an
- // estimate of the join output size.
- size_type probe_to_build_ratio{0};
- if (build_table_num_rows > 0) {
- probe_to_build_ratio = static_cast(
- std::ceil(static_cast(probe_table_num_rows) / build_table_num_rows));
- } else {
- // If the build table is empty, we know exactly how large the output
- // will be for the different types of joins and can return immediately
+ // If the build table is empty, we know exactly how large the output
+ // will be for the different types of joins and can return immediately
+ if (0 == build_table_num_rows) {
switch (JoinKind) {
// Inner join with an empty table will have no output
case join_kind::INNER_JOIN: return 0;
@@ -94,13 +82,9 @@ std::size_t estimate_join_output_size(table_device_view build_table,
}
}
- size_type sample_probe_num_rows{probe_table_num_rows};
- constexpr size_type MAX_RATIO{5};
- if (probe_to_build_ratio > MAX_RATIO) { sample_probe_num_rows = build_table_num_rows; }
-
// Allocate storage for the counter used to get the size of the join output
- std::size_t h_size_estimate{0};
- rmm::device_scalar size_estimate(0, stream);
+ std::size_t h_size{0};
+ rmm::device_scalar d_size(0, stream);
CHECK_CUDA(stream.value());
@@ -116,55 +100,23 @@ std::size_t estimate_join_output_size(table_device_view build_table,
int num_sms{-1};
CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
- // Continue probing with a subset of the probe table until either:
- // a non-zero output size estimate is found OR
- // all of the rows in the probe table have been sampled
- do {
- sample_probe_num_rows = std::min(sample_probe_num_rows, probe_table_num_rows);
-
- size_estimate.set_value_to_zero_async(stream);
-
- row_hash hash_probe{probe_table};
- row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
- // Probe the hash table without actually building the output to simply
- // find what the size of the output will be.
- compute_join_output_size
- <<>>(hash_table,
- build_table,
- probe_table,
- hash_probe,
- equality,
- sample_probe_num_rows,
- size_estimate.data());
- CHECK_CUDA(stream.value());
-
- // Only in case subset of probe table is chosen,
- // increase the estimated output size by a factor of the ratio between the
- // probe and build tables
- if (sample_probe_num_rows < probe_table_num_rows) {
- h_size_estimate = size_estimate.value(stream) * probe_to_build_ratio;
- } else {
- h_size_estimate = size_estimate.value(stream);
- }
-
- // If the size estimate is non-zero, then we have a valid estimate and can break
- // If sample_probe_num_rows >= probe_table_num_rows, then we've sampled the entire
- // probe table, in which case the estimate is exact and we can break
- if ((h_size_estimate > 0) || (sample_probe_num_rows >= probe_table_num_rows)) { break; }
-
- // If the size estimate is zero, then double the number of sampled rows in the probe
- // table. Reduce the ratio of the number of probe rows sampled to the number of rows
- // in the build table by the same factor
- if (0 == h_size_estimate) {
- constexpr size_type GROW_RATIO{2};
- sample_probe_num_rows *= GROW_RATIO;
- probe_to_build_ratio =
- static_cast(std::ceil(static_cast(probe_to_build_ratio) / GROW_RATIO));
- }
+ row_hash hash_probe{probe_table};
+ row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+ // Probe the hash table without actually building the output to simply
+ // find what the size of the output will be.
+ compute_join_output_size
+ <<>>(hash_table,
+ build_table,
+ probe_table,
+ hash_probe,
+ equality,
+ probe_table_num_rows,
+ d_size.data());
- } while (true);
+ CHECK_CUDA(stream.value());
+ h_size = d_size.value(stream);
- return h_size_estimate;
+ return h_size;
}
/**
@@ -236,6 +188,7 @@ struct hash_join::hash_join_impl {
std::unique_ptr>>
inner_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const;
@@ -243,6 +196,7 @@ struct hash_join::hash_join_impl {
std::unique_ptr>>
left_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const;
@@ -250,22 +204,38 @@ struct hash_join::hash_join_impl {
std::unique_ptr>>
full_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const;
+ std::size_t inner_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const;
+
+ std::size_t left_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const;
+
+ std::size_t full_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr) const;
+
private:
template
std::pair>,
std::unique_ptr>>
compute_hash_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const;
/**
* @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
* and returns the output indices of `build_table` and `probe_table` as a combined table,
- * i.e. if full join is specified as the join type then left join is called.
+ * i.e. if full join is specified as the join type then left join is called. Behavior
+ * is undefined if the provided `output_size` is smaller than the actual output size.
*
* @throw cudf::logic_error if hash table is null.
*
@@ -273,6 +243,7 @@ struct hash_join::hash_join_impl {
*
* @param probe_table Table of probe side columns to join.
* @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned vectors.
*
@@ -283,6 +254,7 @@ struct hash_join::hash_join_impl {
std::unique_ptr>>
probe_join_indices(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const;
};
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index f2e4bab02c6..6cb04cadcac 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -50,11 +50,11 @@ inner_join(table_view const& left_input,
// build the hash map from the smaller table.
if (right.num_rows() > left.num_rows()) {
cudf::hash_join hj_obj(left, compare_nulls, stream);
- auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
+ auto result = hj_obj.inner_join(right, compare_nulls, std::nullopt, stream, mr);
return std::make_pair(std::move(result.second), std::move(result.first));
} else {
cudf::hash_join hj_obj(right, compare_nulls, stream);
- return hj_obj.inner_join(left, compare_nulls, stream, mr);
+ return hj_obj.inner_join(left, compare_nulls, std::nullopt, stream, mr);
}
}
@@ -112,7 +112,7 @@ left_join(table_view const& left_input,
table_view const right = matched.second.back();
cudf::hash_join hj_obj(right, compare_nulls, stream);
- return hj_obj.left_join(left, compare_nulls, stream, mr);
+ return hj_obj.left_join(left, compare_nulls, std::nullopt, stream, mr);
}
std::unique_ptr left_join(table_view const& left_input,
@@ -175,7 +175,7 @@ full_join(table_view const& left_input,
table_view const right = matched.second.back();
cudf::hash_join hj_obj(right, compare_nulls, stream);
- return hj_obj.full_join(left, compare_nulls, stream, mr);
+ return hj_obj.full_join(left, compare_nulls, std::nullopt, stream, mr);
}
std::unique_ptr full_join(table_view const& left_input,
@@ -234,30 +234,55 @@ std::pair>,
std::unique_ptr>>
hash_join::inner_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
- return impl->inner_join(probe, compare_nulls, stream, mr);
+ return impl->inner_join(probe, compare_nulls, output_size, stream, mr);
}
std::pair>,
std::unique_ptr>>
hash_join::left_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
- return impl->left_join(probe, compare_nulls, stream, mr);
+ return impl->left_join(probe, compare_nulls, output_size, stream, mr);
}
std::pair>,
std::unique_ptr>>
hash_join::full_join(cudf::table_view const& probe,
null_equality compare_nulls,
+ std::optional output_size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
- return impl->full_join(probe, compare_nulls, stream, mr);
+ return impl->full_join(probe, compare_nulls, output_size, stream, mr);
+}
+
+std::size_t hash_join::inner_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const
+{
+ return impl->inner_join_size(probe, compare_nulls, stream);
+}
+
+std::size_t hash_join::left_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream) const
+{
+ return impl->left_join_size(probe, compare_nulls, stream);
+}
+
+std::size_t hash_join::full_join_size(cudf::table_view const& probe,
+ null_equality compare_nulls,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr) const
+{
+ return impl->full_join_size(probe, compare_nulls, stream, mr);
}
// external APIs
diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu
index 70a6826d9eb..66b5bb98dbf 100644
--- a/cpp/src/labeling/label_bins.cu
+++ b/cpp/src/labeling/label_bins.cu
@@ -161,7 +161,7 @@ constexpr auto is_supported_bin_type()
struct bin_type_dispatcher {
template
std::enable_if_t(), std::unique_ptr> operator()(
- Args&&... args)
+ Args&&...)
{
CUDF_FAIL("Type not support for cudf::bin");
}
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index e54651c8473..014828a6cad 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -88,7 +88,7 @@ struct lookup_functor {
cudf::mutable_column_device_view mutable_ret_bools,
cudf::mutable_column_device_view mutable_ret_validity,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr)
+ rmm::mr::device_memory_resource*)
{
thrust::for_each(
rmm::exec_policy(stream),
@@ -163,8 +163,6 @@ struct lookup_functor {
auto const d_lists = lists_column_device_view(*device_view);
auto const d_skeys = get_search_keys_device_iterable_view(search_key, stream);
- auto const lists_column_has_nulls = lists.has_nulls() || lists.child().has_nulls();
-
auto result_validity = make_fixed_width_column(
data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr);
auto result_bools = make_fixed_width_column(
diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu
index 222c37507c4..5da8aef5853 100644
--- a/cpp/src/lists/interleave_columns.cu
+++ b/cpp/src/lists/interleave_columns.cu
@@ -210,7 +210,6 @@ struct interleave_list_entries_fn {
rmm::mr::device_memory_resource* mr) const noexcept
{
auto const num_cols = input.num_columns();
- auto const num_rows = input.num_rows();
auto const table_dv_ptr = table_device_view::create(input);
// The output child column.
diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu
index 2cbd568a64c..1cd2f326f44 100644
--- a/cpp/src/replace/nulls.cu
+++ b/cpp/src/replace/nulls.cu
@@ -366,7 +366,7 @@ std::unique_ptr replace_nulls_scalar_kernel_forwarder::operator()<
std::unique_ptr replace_nulls_policy_impl(cudf::column_view const& input,
cudf::replace_policy const& replace_policy,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource*)
+ rmm::mr::device_memory_resource* mr)
{
auto device_in = cudf::column_device_view::create(input);
auto index = thrust::make_counting_iterator(0);
@@ -392,7 +392,8 @@ std::unique_ptr replace_nulls_policy_impl(cudf::column_view const&
gather_map.begin(),
gather_map.end(),
cudf::out_of_bounds_policy::DONT_CHECK,
- stream);
+ stream,
+ mr);
return std::move(output->release()[0]);
}
diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu
index 89006651bac..5bbdb5988e7 100644
--- a/cpp/src/reshape/byte_cast.cu
+++ b/cpp/src/reshape/byte_cast.cu
@@ -34,10 +34,10 @@ struct byte_list_conversion {
*/
template
std::enable_if_t::value and !is_floating_point(), std::unique_ptr>
- operator()(column_view const& input_column,
- flip_endianness configuration,
- rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr) const
+ operator()(column_view const&,
+ flip_endianness,
+ rmm::cuda_stream_view,
+ rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Unsupported non-numeric and non-string column");
}
@@ -87,7 +87,7 @@ struct byte_list_conversion {
template <>
std::unique_ptr byte_list_conversion::operator()(
column_view const& input_column,
- flip_endianness configuration,
+ flip_endianness,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 9a6df2a664c..328959732a0 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -34,7 +34,7 @@ struct interleave_columns_functor {
not std::is_same::value and
not std::is_same::value,
std::unique_ptr>
- operator()(Args&&... args)
+ operator()(Args&&...)
{
CUDF_FAIL("Called `interleave_columns` on none-supported data type.");
}
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index 379ceceaf17..cc918305349 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -911,14 +911,9 @@ struct dispatch_from_timestamps_fn {
d_timestamps.size(),
pfn);
}
- template ()>* = nullptr>
- void operator()(column_device_view const&,
- format_item const*,
- size_type,
- timestamp_units,
- const int32_t*,
- char* d_chars,
- rmm::cuda_stream_view stream) const
+
+ template
+ std::enable_if_t(), void> operator()(Args&&...) const
{
CUDF_FAIL("Only timestamps type are expected");
}
diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu
index 6923f8a24fd..82039ad7692 100644
--- a/cpp/src/strings/convert/convert_durations.cu
+++ b/cpp/src/strings/convert/convert_durations.cu
@@ -271,7 +271,7 @@ struct duration_to_string_fn : public duration_to_string_size_fn {
return str;
}
- __device__ char* int_to_2digitstr(char* str, int min_digits, int8_t value)
+ __device__ char* int_to_2digitstr(char* str, int8_t value)
{
assert(value >= -99 && value <= 99);
value = std::abs(value);
@@ -287,11 +287,11 @@ struct duration_to_string_fn : public duration_to_string_size_fn {
inline __device__ char* hour_12(char* ptr, duration_component const* timeparts)
{
- return int_to_2digitstr(ptr, 2, timeparts->hour % 12);
+ return int_to_2digitstr(ptr, timeparts->hour % 12);
}
inline __device__ char* hour_24(char* ptr, duration_component const* timeparts)
{
- return int_to_2digitstr(ptr, 2, timeparts->hour);
+ return int_to_2digitstr(ptr, timeparts->hour);
}
inline __device__ char* am_or_pm(char* ptr, duration_component const* timeparts)
{
@@ -301,11 +301,11 @@ struct duration_to_string_fn : public duration_to_string_size_fn {
}
inline __device__ char* minute(char* ptr, duration_component const* timeparts)
{
- return int_to_2digitstr(ptr, 2, timeparts->minute);
+ return int_to_2digitstr(ptr, timeparts->minute);
}
inline __device__ char* second(char* ptr, duration_component const* timeparts)
{
- return int_to_2digitstr(ptr, 2, timeparts->second);
+ return int_to_2digitstr(ptr, timeparts->second);
}
inline __device__ char* subsecond(char* ptr, duration_component const* timeparts)
@@ -446,11 +446,8 @@ struct dispatch_from_durations_fn {
}
// non-duration types throw an exception
- template ()>* = nullptr>
- std::unique_ptr operator()(column_view const&,
- std::string const& format,
- rmm::cuda_stream_view,
- rmm::mr::device_memory_resource*) const
+ template
+ std::enable_if_t(), std::unique_ptr> operator()(Args&&...) const
{
CUDF_FAIL("Values for from_durations function must be a duration type.");
}
diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu
index 48d25c1707f..7043174f5bf 100644
--- a/cpp/src/strings/convert/convert_hex.cu
+++ b/cpp/src/strings/convert/convert_hex.cu
@@ -103,8 +103,8 @@ struct dispatch_hex_to_integers_fn {
hex_to_integer_fn{strings_column});
}
// non-integral types throw an exception
- template ::value>* = nullptr>
- void operator()(column_device_view const&, mutable_column_view&, rmm::cuda_stream_view) const
+ template
+ std::enable_if_t::value, void> operator()(Args&&...) const
{
CUDF_FAIL("Output for hex_to_integers must be an integral type.");
}
@@ -118,6 +118,86 @@ void dispatch_hex_to_integers_fn::operator()(column_device_view const&,
CUDF_FAIL("Output for hex_to_integers must not be a boolean type.");
}
+/**
+ * @brief Functor to convert integers to hexadecimal strings
+ *
+ * @tparam IntegerType The specific integer type to convert from.
+ */
+template
+struct integer_to_hex_fn {
+ column_device_view const d_column;
+ offset_type* d_offsets{};
+ char* d_chars{};
+
+ __device__ void byte_to_hex(uint8_t byte, char* hex)
+ {
+ hex[0] = [&] {
+ if (byte < 16) { return '0'; }
+ uint8_t const nibble = byte / 16;
+
+ byte = byte - (nibble * 16);
+ return static_cast(nibble < 10 ? '0' + nibble : 'A' + (nibble - 10));
+ }();
+ hex[1] = byte < 10 ? '0' + byte : 'A' + (byte - 10);
+ }
+
+ __device__ void operator()(size_type idx)
+ {
+ if (d_column.is_null(idx)) {
+ if (!d_chars) { d_offsets[idx] = 0; }
+ return;
+ }
+
+ auto const value = d_column.element(idx); // ex. 123456
+ auto value_bytes = reinterpret_cast(&value); // 0x40E20100
+
+ // compute the number of output bytes
+ int bytes = sizeof(IntegerType);
+ int byte_index = sizeof(IntegerType);
+ while ((--byte_index > 0) && (value_bytes[byte_index] & 0xFF) == 0) { --bytes; }
+
+ // create output
+ byte_index = bytes - 1;
+ if (d_chars) {
+ auto d_buffer = d_chars + d_offsets[idx];
+ while (byte_index >= 0) {
+ byte_to_hex(value_bytes[byte_index], d_buffer);
+ d_buffer += 2;
+ --byte_index;
+ }
+ } else {
+ d_offsets[idx] = static_cast(bytes) * 2; // 2 hex characters per byte
+ }
+ }
+};
+
+struct dispatch_integers_to_hex_fn {
+ template >* = nullptr>
+ std::unique_ptr operator()(column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr) const
+ {
+ auto const d_column = column_device_view::create(input, stream);
+
+ auto children = cudf::strings::detail::make_strings_children(
+ integer_to_hex_fn{*d_column}, input.size(), stream, mr);
+
+ return make_strings_column(input.size(),
+ std::move(children.first),
+ std::move(children.second),
+ input.null_count(),
+ cudf::detail::copy_bitmask(input, stream, mr),
+ stream,
+ mr);
+ }
+ // non-integral types throw an exception
+ template
+ std::enable_if_t, std::unique_ptr> operator()(Args...) const
+ {
+ CUDF_FAIL("integers_to_hex only supports integral type columns");
+ }
+};
+
} // namespace
// This will convert a strings column into any integer column type.
@@ -183,6 +263,14 @@ std::unique_ptr is_hex(strings_column_view const& strings,
return results;
}
+std::unique_ptr integers_to_hex(column_view const& input,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ if (input.is_empty()) { return cudf::make_empty_column(data_type{type_id::STRING}); }
+ return type_dispatcher(input.type(), dispatch_integers_to_hex_fn{}, input, stream, mr);
+}
+
} // namespace detail
// external API
@@ -201,5 +289,12 @@ std::unique_ptr is_hex(strings_column_view const& strings,
return detail::is_hex(strings, rmm::cuda_stream_default, mr);
}
+std::unique_ptr integers_to_hex(column_view const& input,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::integers_to_hex(input, rmm::cuda_stream_default, mr);
+}
+
} // namespace strings
} // namespace cudf
diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index 5194bc6e86a..ae0ea4b90e6 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -124,7 +124,6 @@ struct split_tokenizer_fn : base_split_tokenizer {
* for string at `string_index`.
*
* @param idx Index of the delimiter in the chars column
- * @param column_count Number of output columns
* @param d_token_counts Token counts for each string
* @param d_positions The beginning byte position of each delimiter
* @param positions_count Number of delimiters
@@ -132,7 +131,6 @@ struct split_tokenizer_fn : base_split_tokenizer {
* @param d_all_tokens All output tokens for the strings column
*/
__device__ void process_tokens(size_type idx,
- size_type column_count,
size_type const* d_token_counts,
size_type const* d_positions,
size_type positions_count,
@@ -253,7 +251,6 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
* for string at `string_index`.
*
* @param idx Index of the delimiter in the chars column
- * @param column_count Number of output columns
* @param d_token_counts Token counts for each string
* @param d_positions The ending byte position of each delimiter
* @param positions_count Number of delimiters
@@ -261,7 +258,6 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
* @param d_all_tokens All output tokens for the strings column
*/
__device__ void process_tokens(size_type idx, // delimiter position index
- size_type column_count, // number of output columns
size_type const* d_token_counts, // token counts for each string
size_type const* d_positions, // end of each delimiter
size_type positions_count, // total number of delimiters
@@ -301,10 +297,9 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
*
* @param idx Index of a byte in the chars column.
* @param d_offsets Offsets values to locate the chars ranges.
- * @param chars_bytes Total number of characters to process.
* @return true if delimiter is found ending at position `idx`
*/
- __device__ bool is_delimiter(size_type idx, int32_t const* d_offsets, size_type chars_bytes) const
+ __device__ bool is_delimiter(size_type idx, int32_t const* d_offsets, size_type) const
{
auto delim_length = d_delimiter.size_bytes();
if (idx < delim_length - 1) return false;
@@ -524,24 +519,19 @@ std::unique_ptr split_fn(strings_column_view const& strings_column,
});
// get the positions for every token using the delimiter positions
- thrust::for_each_n(rmm::exec_policy(stream),
- thrust::make_counting_iterator(0),
- delimiter_count,
- [tokenizer,
- columns_count,
- d_token_counts,
- d_positions,
- delimiter_count,
- d_string_indices,
- d_tokens] __device__(size_type idx) {
- tokenizer.process_tokens(idx,
- columns_count,
- d_token_counts,
- d_positions,
- delimiter_count,
- d_string_indices,
- d_tokens);
- });
+ thrust::for_each_n(
+ rmm::exec_policy(stream),
+ thrust::make_counting_iterator(0),
+ delimiter_count,
+ [tokenizer,
+ d_token_counts,
+ d_positions,
+ delimiter_count,
+ d_string_indices,
+ d_tokens] __device__(size_type idx) {
+ tokenizer.process_tokens(
+ idx, d_token_counts, d_positions, delimiter_count, d_string_indices, d_tokens);
+ });
// Create each column.
// - Each pair points to the strings for that column for each row.
@@ -609,12 +599,10 @@ struct whitespace_split_tokenizer_fn : base_whitespace_split_tokenizer {
* for string at `string_index`.
*
* @param idx Index of the string to process
- * @param column_count Number of output columns
* @param d_token_counts Token counts for each string
* @param d_all_tokens All output tokens for the strings column
*/
__device__ void process_tokens(size_type idx,
- size_type column_count,
size_type const* d_token_counts,
string_index_pair* d_all_tokens) const
{
@@ -660,12 +648,10 @@ struct whitespace_rsplit_tokenizer_fn : base_whitespace_split_tokenizer {
* for string at `string_index`.
*
* @param idx Index of the string to process
- * @param column_count Number of output columns
* @param d_token_counts Token counts for each string
* @param d_all_tokens All output tokens for the strings column
*/
__device__ void process_tokens(size_type idx, // string position index
- size_type column_count,
size_type const* d_token_counts,
string_index_pair* d_all_tokens) const
{
@@ -787,13 +773,12 @@ std::unique_ptr whitespace_split_fn(size_type strings_count,
d_tokens,
d_tokens + (columns_count * strings_count),
string_index_pair{nullptr, 0});
- thrust::for_each_n(
- rmm::exec_policy(stream),
- thrust::make_counting_iterator(0),
- strings_count,
- [tokenizer, columns_count, d_token_counts, d_tokens] __device__(size_type idx) {
- tokenizer.process_tokens(idx, columns_count, d_token_counts, d_tokens);
- });
+ thrust::for_each_n(rmm::exec_policy(stream),
+ thrust::make_counting_iterator(0),
+ strings_count,
+ [tokenizer, d_token_counts, d_tokens] __device__(size_type idx) {
+ tokenizer.process_tokens(idx, d_token_counts, d_tokens);
+ });
// Create each column.
// - Each pair points to a string for that column for each row.
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index a74f6638a61..834bac5e1ac 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -238,7 +238,7 @@ void compute_substring_indices(column_device_view const& d_column,
size_type* start_char_pos,
size_type* end_char_pos,
rmm::cuda_stream_view stream,
- rmm::mr::device_memory_resource* mr)
+ rmm::mr::device_memory_resource*)
{
auto strings_count = d_column.size();
diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu
index 061597ae817..4be081bcf93 100644
--- a/cpp/src/text/detokenize.cu
+++ b/cpp/src/text/detokenize.cu
@@ -124,7 +124,7 @@ struct token_row_offsets_fn {
// non-integral types throw an exception
template ()>* = nullptr>
- std::unique_ptr> operator()(Args&&... args) const
+ std::unique_ptr> operator()(Args&&...) const
{
CUDF_FAIL("The detokenize indices parameter must be an integer type.");
}
diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu
index d286aa55bdb..84a7db5dd4e 100644
--- a/cpp/src/text/normalize.cu
+++ b/cpp/src/text/normalize.cu
@@ -129,7 +129,6 @@ struct codepoint_to_utf8_fn {
if (!d_chars) d_offsets[idx] = 0;
return;
}
- auto const d_str = d_strings.element(idx);
auto const offset = d_cp_offsets[idx];
auto const count = d_cp_offsets[idx + 1] - offset; // number of code-points
auto str_cps = cp_data + offset; // code-points for this string
diff --git a/cpp/src/text/stemmer.cu b/cpp/src/text/stemmer.cu
index 77e457bbf16..a7bb03f389f 100644
--- a/cpp/src/text/stemmer.cu
+++ b/cpp/src/text/stemmer.cu
@@ -138,7 +138,7 @@ struct dispatch_is_letter_fn {
}
template ()>* = nullptr>
- std::unique_ptr operator()(Args&&... args) const
+ std::unique_ptr operator()(Args&&...) const
{
CUDF_FAIL("The is_letter indices parameter must be an integer type.");
}
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index fdaeb3ebdab..813c755cfff 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -186,6 +186,7 @@ ConfigureTest(CSV_TEST io/csv_test.cpp)
ConfigureTest(ORC_TEST io/orc_test.cpp)
ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
ConfigureTest(JSON_TEST io/json_test.cpp)
+ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
###################################################################################################
# - sort tests ------------------------------------------------------------------------------------
@@ -217,7 +218,8 @@ ConfigureTest(COPYING_TEST
copying/shift_tests.cpp
copying/slice_tests.cpp
copying/split_tests.cpp
- copying/utility_tests.cpp)
+ copying/utility_tests.cpp
+ copying/reverse_tests.cpp)
###################################################################################################
# - utilities tests -------------------------------------------------------------------------------
diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp
index 9ac34a3044e..7cd56f0ea43 100644
--- a/cpp/tests/copying/copy_if_else_nested_tests.cpp
+++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp
@@ -102,6 +102,35 @@ TYPED_TEST(TypedCopyIfElseNestedTest, StructsWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), expected_result->view());
}
+TYPED_TEST(TypedCopyIfElseNestedTest, LongerStructsWithNulls)
+{
+ using T = TypeParam;
+
+ using namespace cudf;
+ using namespace cudf::test;
+
+ using ints = fixed_width_column_wrapper;
+ using structs = structs_column_wrapper;
+ using bools = fixed_width_column_wrapper;
+
+ auto selector_column = bools{1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
+ 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
+ 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0}
+ .release();
+ auto lhs_child_1 =
+ ints{{27, -80, -24, 76, -56, 42, 5, 13, -69, -77, 61, -77, 72, 0, 31, 118, -30,
+ 86, 125, 0, 0, 0, 75, -49, 125, 60, 116, 118, 64, 20, -70, -18, 0, -25,
+ 22, -46, -89, -9, 27, -56, -77, 123, 0, -90, 87, -113, -37, 22, -22, -53, 73,
+ 99, 113, -2, -24, 113, 75, 6, 82, -58, 122, -123, -127, 19, -62, -24},
+ iterator_with_null_at(std::vector{13, 19, 20, 21, 32, 42})};
+
+ auto lhs_structs_column = structs{{lhs_child_1}}.release();
+ auto result_column =
+ copy_if_else(lhs_structs_column->view(), lhs_structs_column->view(), selector_column->view());
+
+ CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), lhs_structs_column->view());
+}
+
TYPED_TEST(TypedCopyIfElseNestedTest, Lists)
{
using T = TypeParam;
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index f3b9cf25357..3f345689ce2 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -418,6 +418,35 @@ TEST_F(PackUnpackTest, NestedEmpty)
this->run_test(src_table);
}
}
+
+TEST_F(PackUnpackTest, NestedSliced)
+{
+ auto valids =
+ cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+ using LCW = cudf::test::lists_column_wrapper;
+
+ cudf::test::lists_column_wrapper col0{ {{{1, 2, 3}, valids}, {4, 5}},
+ {{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
+ {{6, 12}},
+ {{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
+ {{LCW{}, {-1, -2, -3, -4, -5}}, valids},
+ {LCW{}},
+ {{-10}, {-100, -200}} };
+
+ cudf::test::strings_column_wrapper col1{"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
+ cudf::test::fixed_width_column_wrapper col2{ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f };
+
+ std::vector> children;
+ children.push_back(std::make_unique(col2));
+ children.push_back(std::make_unique(col0));
+ children.push_back(std::make_unique(col1));
+ auto col3 = cudf::make_structs_column(static_cast(col0).size(), std::move(children), 0, rmm::device_buffer{});
+
+ cudf::table_view t({col0, col1, col2, *col3});
+ this->run_test(t);
+}
+
// clang-format on
} // namespace test
diff --git a/cpp/tests/copying/reverse_tests.cpp b/cpp/tests/copying/reverse_tests.cpp
new file mode 100644
index 00000000000..7f911e0aa85
--- /dev/null
+++ b/cpp/tests/copying/reverse_tests.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+template
+class ReverseTypedTestFixture : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(ReverseTypedTestFixture, cudf::test::AllTypes);
+TYPED_TEST(ReverseTypedTestFixture, ReverseTable)
+{
+ using T = TypeParam;
+ constexpr cudf::size_type num_values{10};
+
+ auto input = cudf::test::fixed_width_column_wrapper