diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index a9398f4527c..8dd133c8fa3 100755 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -75,10 +75,10 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \ # conda install "your-pkg=1.0.0" # Install the master version of dask, distributed, and streamz -logger "pip install git+https://github.com/dask/distributed.git@master --upgrade --no-deps" -pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps -logger "pip install git+https://github.com/dask/dask.git@master --upgrade --no-deps" -pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps +logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps" +pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps +logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps" +pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps" pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index c9c2a37a4e9..588debc40db 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -19,8 +19,9 @@ export CUDA_REL=${CUDA_VERSION%.*} export GPUCI_CONDA_RETRY_MAX=1 export GPUCI_CONDA_RETRY_SLEEP=30 -# Use Ninja to build +# Use Ninja to build, setup Conda Build Dir export CMAKE_GENERATOR="Ninja" +export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld" # Switch to project root; also root of repo checkout cd $WORKSPACE @@ -63,29 +64,34 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then CONDA_CHANNEL="" else CONDA_BUILD_ARGS="--dirty --no-remove-work-dir" - CONDA_CHANNEL="-c $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/" + CONDA_CHANNEL="-c $WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/" fi if [ "$BUILD_LIBCUDF" == '1' ]; then gpuci_logger "Build conda pkg for libcudf" - gpuci_conda_retry build conda/recipes/libcudf $CONDA_BUILD_ARGS + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS + mkdir -p ${CONDA_BLD_DIR}/libcudf/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work + gpuci_logger "Build conda pkg for libcudf_kafka" - gpuci_conda_retry build conda/recipes/libcudf_kafka $CONDA_BUILD_ARGS + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf_kafka $CONDA_BUILD_ARGS + mkdir -p ${CONDA_BLD_DIR}/libcudf_kafka/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf_kafka/work fi if [ "$BUILD_CUDF" == '1' ]; then gpuci_logger "Build conda pkg for cudf" - gpuci_conda_retry build conda/recipes/cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL gpuci_logger "Build conda pkg for dask-cudf" - gpuci_conda_retry build conda/recipes/dask-cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL gpuci_logger "Build conda pkg for cudf_kafka" - gpuci_conda_retry build conda/recipes/cudf_kafka --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL gpuci_logger "Build conda pkg for custreamz" - gpuci_conda_retry build conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL fi ################################################################################ # UPLOAD - Conda packages diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 0465197e861..ca8ee1d75ac 100755 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -28,12 +28,12 @@ fi ################################################################################ gpuci_logger "Get conda file output locations" -export LIBCUDF_FILE=`conda build conda/recipes/libcudf --output` -export LIBCUDF_KAFKA_FILE=`conda build conda/recipes/libcudf_kafka --output` -export CUDF_FILE=`conda build conda/recipes/cudf --python=$PYTHON --output` -export DASK_CUDF_FILE=`conda build conda/recipes/dask-cudf --python=$PYTHON --output` -export CUDF_KAFKA_FILE=`conda build conda/recipes/cudf_kafka --python=$PYTHON --output` -export CUSTREAMZ_FILE=`conda build conda/recipes/custreamz --python=$PYTHON --output` +export LIBCUDF_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf --output` +export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf_kafka --output` +export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output` +export DASK_CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON --output` +export CUDF_KAFKA_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON --output` +export CUSTREAMZ_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON --output` ################################################################################ # UPLOAD - Conda packages diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 917a2b4cf27..7614e19cc89 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -24,6 +24,7 @@ cd $WORKSPACE # Determine CUDA release version export CUDA_REL=${CUDA_VERSION%.*} +export CONDA_ARTIFACT_PATH=${WORKSPACE}/ci/artifacts/cudf/cpu/.conda-bld/ # Parse git describe export GIT_DESCRIBE_TAG=`git describe --tags` @@ -97,11 +98,11 @@ conda config --show-sources conda list --show-channel-urls function install_dask { - # Install the master version of dask, distributed, and streamz - gpuci_logger "Install the master version of dask, distributed, and streamz" + # Install the main version of dask, distributed, and streamz + gpuci_logger "Install the main version of dask, distributed, and streamz" set -x - pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps - pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps + pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps + pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps set +x } @@ -151,7 +152,7 @@ else #Project Flash export LIB_BUILD_DIR="$WORKSPACE/ci/artifacts/cudf/cpu/libcudf_work/cpp/build" export LD_LIBRARY_PATH="$LIB_BUILD_DIR:$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" - + if hasArg --skip-tests; then gpuci_logger "Skipping Tests" exit 0 @@ -170,15 +171,15 @@ else ${gt} --gtest_output=xml:${WORKSPACE}/test-results/ done - CUDF_CONDA_FILE=`find $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ -name "libcudf-*.tar.bz2"` + CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"` CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install - KAFKA_CONDA_FILE=`find $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ -name "libcudf_kafka-*.tar.bz2"` + KAFKA_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf_kafka-*.tar.bz2"` KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without extension KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE" - conda install -c $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE" + conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE" install_dask diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml index 3541ed1208c..35108ddd8ca 100644 --- a/conda/environments/cudf_dev_cuda10.1.yml +++ b/conda/environments/cudf_dev_cuda10.1.yml @@ -62,7 +62,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - pip: - - git+https://github.com/dask/dask.git@master - - git+https://github.com/dask/distributed.git@master + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git - pyorc diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml index 839533516fb..3a24e38a397 100644 --- a/conda/environments/cudf_dev_cuda10.2.yml +++ b/conda/environments/cudf_dev_cuda10.2.yml @@ -62,7 +62,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - pip: - - git+https://github.com/dask/dask.git@master - - git+https://github.com/dask/distributed.git@master + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git - pyorc diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 401eaea63da..821c6f5320d 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -62,7 +62,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - pip: - - git+https://github.com/dask/dask.git@master - - git+https://github.com/dask/distributed.git@master + - git+https://github.com/dask/dask.git@main + - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git - pyorc diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh index 0fc29d42721..3fc1182b33b 100644 --- a/conda/recipes/dask-cudf/run_test.sh +++ b/conda/recipes/dask-cudf/run_test.sh @@ -9,11 +9,11 @@ function logger() { } # Install the latest version of dask and distributed -logger "pip install git+https://github.com/dask/distributed.git@master --upgrade --no-deps" -pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps +logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps" +pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps -logger "pip install git+https://github.com/dask/dask.git@master --upgrade --no-deps" -pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps +logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps" +pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps logger "python -c 'import dask_cudf'" python -c "import dask_cudf" diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 5db32987624..dfc340b1459 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -115,6 +115,7 @@ ConfigureBench(REDUCTION_BENCH reduction/anyall_benchmark.cpp reduction/dictionary_benchmark.cpp reduction/reduce_benchmark.cpp + reduction/scan_benchmark.cpp reduction/minmax_benchmark.cpp) ################################################################################################### @@ -183,6 +184,7 @@ ConfigureBench(STRINGS_BENCH string/convert_durations_benchmark.cpp string/convert_floats_benchmark.cpp string/copy_benchmark.cpp + string/extract_benchmark.cpp string/filter_benchmark.cpp string/find_benchmark.cpp string/replace_benchmark.cpp diff --git a/cpp/benchmarks/reduction/scan_benchmark.cpp b/cpp/benchmarks/reduction/scan_benchmark.cpp new file mode 100644 index 00000000000..b2d8fcfc004 --- /dev/null +++ b/cpp/benchmarks/reduction/scan_benchmark.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +class ReductionScan : public cudf::benchmark { +}; + +template +static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + cudf::column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); + } +} + +#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReductionScan, name) \ + (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ + BENCHMARK_REGISTER_F(ReductionScan, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); +SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); +SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); +SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false); +SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true); +SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); +SCAN_BENCHMARK_DEFINE(double_nulls, double, true); diff --git a/cpp/benchmarks/string/extract_benchmark.cpp b/cpp/benchmarks/string/extract_benchmark.cpp new file mode 100644 index 00000000000..dbae18dde3b --- /dev/null +++ b/cpp/benchmarks/string/extract_benchmark.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "string_bench_args.hpp" + +class StringExtract : public cudf::benchmark { +}; + +static void BM_extract(benchmark::State& state, int re_instructions) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + std::string const raw_pattern = + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234" + "5678901234567890123456789012345678901234567890"; + std::string const pattern = "(" + raw_pattern.substr(0, re_instructions) + ")"; + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + auto results = cudf::strings::extract(input, pattern); + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name, instructions) \ + BENCHMARK_DEFINE_F(StringExtract, name) \ + (::benchmark::State & st) { BM_extract(st, instructions); } \ + BENCHMARK_REGISTER_F(StringExtract, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(small, 4) +STRINGS_BENCHMARK_DEFINE(medium, 48) +STRINGS_BENCHMARK_DEFINE(large, 128) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index c29beb65775..b2f152180b0 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -856,7 +856,7 @@ namespace detail { * @brief Convenience function to get offset word from a bitmask * * @see copy_offset_bitmask - * @see offset_bitmask_and + * @see offset_bitmask_binop */ __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source, size_type destination_word_index, diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 805cdc02bc6..881afa63ca5 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -71,16 +71,13 @@ inline auto make_counting_transform_iterator(cudf::size_type start, UnaryFunctio } /** - * @brief value accessor of column with null bitmask - * A unary functor returns scalar value at `id`. - * `operator() (cudf::size_type id)` computes `element` and valid flag at `id` - * This functor is only allowed for nullable columns. + * @brief Value accessor of column that may have a null bitmask. * - * the return value for element `i` will return `column[i]` - * if it is valid, or `null_replacement` if it is null. + * This unary functor returns scalar value at `id`. + * The `operator()(cudf::size_type id)` computes the `element` and valid flag at `id`. * - * @throws cudf::logic_error if the column is not nullable. - * @throws cudf::logic_error if column datatype and Element type mismatch. + * The return value for element `i` will return `column[i]` + * if it is valid, or `null_replacement` if it is null. * * @tparam Element The type of elements in the column */ @@ -88,24 +85,33 @@ template struct null_replaced_value_accessor { column_device_view const col; ///< column view of column in device Element const null_replacement{}; ///< value returned when element is null + bool const has_nulls; ///< true if col has null elements /** - * @brief constructor - * @param[in] _col column device view of cudf column + * @brief Creates an accessor for a null-replacement iterator. + * + * @throws cudf::logic_error if `col` type does not match Element type. + * @throws cudf::logic_error if `has_nulls` is true but `col` does not have a validity mask. + * + * @param[in] col column device view of cudf column * @param[in] null_replacement The value to return for null elements + * @param[in] has_nulls Must be set to true if `col` has nulls. */ - null_replaced_value_accessor(column_device_view const& _col, Element null_val) - : col{_col}, null_replacement{null_val} + null_replaced_value_accessor(column_device_view const& col, + Element null_val, + bool has_nulls = true) + : col{col}, null_replacement{null_val}, has_nulls{has_nulls} { - CUDF_EXPECTS(data_type(type_to_id()) == col.type(), "the data type mismatch"); - // verify valid is non-null, otherwise, is_valid_nocheck() will crash - CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); + CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), + "the data type mismatch"); + // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash + if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); } CUDA_DEVICE_CALLABLE Element operator()(cudf::size_type i) const { - return col.is_valid_nocheck(i) ? col.element(i) : null_replacement; + return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element(i); } }; @@ -140,7 +146,7 @@ struct validity_accessor { * * Dereferencing the returned iterator for element `i` will return `column[i]` * if it is valid, or `null_replacement` if it is null. - * This iterator is only allowed for nullable columns. + * This iterator is only allowed for both nullable and non-nullable columns. * * @throws cudf::logic_error if the column is not nullable. * @throws cudf::logic_error if column datatype and Element type mismatch. @@ -148,15 +154,17 @@ struct validity_accessor { * @tparam Element The type of elements in the column * @param column The column to iterate * @param null_replacement The value to return for null elements - * @return auto Iterator that returns valid column elements, or a null + * @param has_nulls Must be set to true if `column` has nulls. + * @return Iterator that returns valid column elements, or a null * replacement value for null elements. */ template auto make_null_replacement_iterator(column_device_view const& column, - Element const null_replacement = Element{0}) + Element const null_replacement = Element{0}, + bool has_nulls = true) { return make_counting_transform_iterator( - 0, null_replaced_value_accessor{column, null_replacement}); + 0, null_replaced_value_accessor{column, null_replacement, has_nulls}); } /** diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh new file mode 100644 index 00000000000..daefa2a5ffd --- /dev/null +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include + +using cudf::detail::device_span; + +namespace cudf { +namespace detail { +/** + * @brief Computes the merger of an array of bitmasks using a binary operator + * + * @param op The binary operator used to combine the bitmasks + * @param destination The bitmask to write result into + * @param source Array of source mask pointers. All masks must be of same size + * @param source_begin_bits Array of offsets into corresponding @p source masks. + * Must be same size as source array + * @param source_size_bits Number of bits in each mask in @p source + */ +template +__global__ void offset_bitmask_binop(Binop op, + device_span destination, + device_span source, + device_span source_begin_bits, + size_type source_size_bits) +{ + for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; + destination_word_index < destination.size(); + destination_word_index += blockDim.x * gridDim.x) { + bitmask_type destination_word = + detail::get_mask_offset_word(source[0], + destination_word_index, + source_begin_bits[0], + source_begin_bits[0] + source_size_bits); + for (size_type i = 1; i < source.size(); i++) { + destination_word = + + op(destination_word, + detail::get_mask_offset_word(source[i], + destination_word_index, + source_begin_bits[i], + source_begin_bits[i] + source_size_bits)); + } + + destination[destination_word_index] = destination_word; + } +} + +/** + * @copydoc bitmask_binop(Binop op, host_span const, host_span + * const, size_type, rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +rmm::device_buffer bitmask_binop( + Binop op, + host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) +{ + auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; + + inplace_bitmask_binop(op, + device_span(static_cast(dest_mask.data()), + num_bitmask_words(mask_size_bits)), + masks, + masks_begin_bits, + mask_size_bits, + stream, + mr); + + return dest_mask; +} + +/** + * @brief Performs a merge of the specified bitmasks using the binary operator + * provided, and writes in place to destination + * + * @param op The binary operator used to combine the bitmasks + * @param dest_mask Destination to which the merged result is written + * @param masks The list of data pointers of the bitmasks to be merged + * @param masks_begin_bits The bit offsets from which each mask is to be merged + * @param mask_size_bits The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +template +void inplace_bitmask_binop( + Binop op, + device_span dest_mask, + host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS( + std::all_of(masks_begin_bits.begin(), masks_begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(mask_size_bits > 0, "Invalid bit range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + + rmm::device_uvector d_masks(masks.size(), stream, mr); + rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); + + CUDA_TRY(cudaMemcpyAsync( + d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice, stream.value())); + CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), + masks_begin_bits.data(), + masks_begin_bits.size_bytes(), + cudaMemcpyHostToDevice, + stream.value())); + + cudf::detail::grid_1d config(dest_mask.size(), 256); + offset_bitmask_binop<<>>( + op, dest_mask, d_masks, d_begin_bits, mask_size_bits); + CHECK_CUDA(stream.value()); + stream.synchronize(); +} + +} // namespace detail + +} // namespace cudf diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 2f2bc91cb74..b0870ef8d9a 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -88,15 +89,15 @@ rmm::device_buffer copy_bitmask( rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); /** - * @copydoc bitmask_and(std::vector, std::vector const&, size_type, - * rmm::mr::device_memory_resource *) + * @copydoc bitmask_and(host_span const, host_span const, + * size_type, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches */ rmm::device_buffer bitmask_and( - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, + host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); @@ -110,23 +111,33 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::bitmask_or + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +rmm::device_buffer bitmask_or( + table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a bitwise AND of the specified bitmasks, * and writes in place to destination * * @param dest_mask Destination to which the AND result is written * @param masks The list of data pointers of the bitmasks to be ANDed - * @param begin_bits The bit offsets from which each mask is to be ANDed - * @param mask_size The number of bits to be ANDed in each mask + * @param masks_begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size_bits The number of bits to be ANDed in each mask * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer Output bitmask */ void inplace_bitmask_and( - bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, + device_span dest_mask, + host_span masks, + host_span masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 5e1f0f0802e..0d4de1a9beb 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and( table_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a bitwise OR of the bitmasks of columns of a table + * + * If any of the columns isn't nullable, it is considered all valid. + * If no column in the table is nullable, an empty bitmask is returned. + * + * @param view The table of columns + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +rmm::device_buffer bitmask_or( + table_view const& view, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 78188b26473..4a2a7db9638 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -23,10 +24,12 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -41,6 +44,8 @@ #include #include +using cudf::detail::device_span; + namespace cudf { size_type state_null_count(mask_state state, size_type size) { @@ -316,37 +321,6 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, } } -/** - * @brief Computes the bitwise AND of an array of bitmasks - * - * @param destination The bitmask to write result into - * @param source Array of source mask pointers. All masks must be of same size - * @param begin_bit Array of offsets into corresponding @p source masks. - * Must be same size as source array - * @param num_sources Number of masks in @p source array - * @param source_size Number of bits in each mask in @p source - * @param number_of_mask_words The number of words of type bitmask_type to copy - */ -__global__ void offset_bitmask_and(bitmask_type *__restrict__ destination, - bitmask_type const *const *__restrict__ source, - size_type const *__restrict__ begin_bit, - size_type num_sources, - size_type source_size, - size_type number_of_mask_words) -{ - for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; - destination_word_index < number_of_mask_words; - destination_word_index += blockDim.x * gridDim.x) { - bitmask_type destination_word = ~bitmask_type{0}; // All bits 1 - for (size_type i = 0; i < num_sources; i++) { - destination_word &= detail::get_mask_offset_word( - source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size); - } - - destination[destination_word_index] = destination_word; - } -} - // convert [first_bit_index,last_bit_index) to // [first_word_index,last_word_index) struct to_word_index : public thrust::unary_function { @@ -422,51 +396,37 @@ rmm::device_buffer copy_bitmask(column_view const &view, } // Inplace Bitwise AND of the masks -void inplace_bitmask_and(bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, +void inplace_bitmask_and(device_span dest_mask, + host_span masks, + host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - - auto number_of_mask_words = num_bitmask_words(mask_size); - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - - cudf::detail::grid_1d config(number_of_mask_words, 256); - offset_bitmask_and<<>>( + inplace_bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, dest_mask, - d_masks.data().get(), - d_begin_bits.data().get(), - d_masks.size(), + masks, + begin_bits, mask_size, - number_of_mask_words); - - CHECK_CUDA(stream.value()); + stream, + mr); } // Bitwise AND of the masks -rmm::device_buffer bitmask_and(std::vector const &masks, - std::vector const &begin_bits, +rmm::device_buffer bitmask_and(host_span masks, + host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - rmm::device_buffer dest_mask{}; - auto num_bytes = bitmask_allocation_size_bytes(mask_size); - - dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - inplace_bitmask_and( - static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); - - return dest_mask; + return bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + masks, + begin_bits, + mask_size, + stream, + mr); } cudf::size_type count_set_bits(bitmask_type const *bitmask, @@ -651,12 +611,48 @@ rmm::device_buffer bitmask_and(table_view const &view, } if (masks.size() > 0) { - return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); + return cudf::detail::bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + masks, + offsets, + view.num_rows(), + stream, + mr); } return null_mask; } +// Returns the bitwise OR of the null masks of all columns in the table view +rmm::device_buffer bitmask_or(table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + CUDF_FUNC_RANGE(); + rmm::device_buffer null_mask{0, stream, mr}; + if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } + + std::vector masks; + std::vector offsets; + for (auto &&col : view) { + if (col.nullable()) { + masks.push_back(col.null_mask()); + offsets.push_back(col.offset()); + } + } + + if (static_cast(masks.size()) == view.num_columns()) { + return cudf::detail::bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left | right; }, + masks, + offsets, + view.num_rows(), + stream, + mr); + } + + return null_mask; +} } // namespace detail // Count non-zero bits in the specified range @@ -709,4 +705,9 @@ rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_re return detail::bitmask_and(view, rmm::cuda_stream_default, mr); } +rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr) +{ + return detail::bitmask_or(view, rmm::cuda_stream_default, mr); +} + } // namespace cudf diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu index f73ffb0214a..c3aadf47794 100644 --- a/cpp/src/reductions/scan.cu +++ b/cpp/src/reductions/scan.cu @@ -21,11 +21,10 @@ #include #include #include -#include +#include #include #include #include -#include #include #include @@ -34,6 +33,7 @@ namespace cudf { namespace detail { + /** * @brief Dispatcher for running Scan operation on input column * Dispatches scan operation on `Op` and creates output column @@ -73,23 +73,14 @@ struct scan_dispatcher { mutable_column_view output = output_column->mutable_view(); auto d_input = column_device_view::create(input_view, stream); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::exclusive_scan(rmm::exec_policy(stream), - input, - input + size, - output.data(), - Op::template identity(), - Op{}); - } else { - auto input = d_input->begin(); - thrust::exclusive_scan(rmm::exec_policy(stream), - input, - input + size, - output.data(), - Op::template identity(), - Op{}); - } + auto input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); + thrust::exclusive_scan(rmm::exec_policy(stream), + input, + input + size, + output.data(), + Op::template identity(), + Op{}); CHECK_CUDA(stream.value()); return output_column; @@ -147,13 +138,9 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); mutable_column_view output = output_column->mutable_view(); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); - } else { - auto input = d_input->begin(); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); - } + auto const input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); + thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data(), Op{}); CHECK_CUDA(stream.value()); return output_column; @@ -171,13 +158,10 @@ struct scan_dispatcher { auto d_input = column_device_view::create(input_view, stream); - if (input_view.has_nulls()) { - auto input = make_null_replacement_iterator(*d_input, Op::template identity()); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); - } else { - auto input = d_input->begin(); - thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); - } + auto input = + make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); + thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{}); + CHECK_CUDA(stream.value()); auto output_column = diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 5f92fea76f5..2bd71767265 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ #include #include - namespace cudf { namespace { // Helper function to superimpose validity of parent struct @@ -44,18 +43,19 @@ void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, // Child should have a null mask. // `AND` the child's null mask with the parent's. - auto data_type{child.type()}; - auto num_rows{child.size()}; - auto current_child_mask = child.mutable_view().null_mask(); - cudf::detail::inplace_bitmask_and(current_child_mask, - {reinterpret_cast(parent_null_mask), - reinterpret_cast(current_child_mask)}, - {0, 0}, - child.size(), - stream, - mr); + std::vector masks{ + reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}; + std::vector begin_bits{0, 0}; + cudf::detail::inplace_bitmask_and( + detail::device_span(current_child_mask, num_bitmask_words(child.size())), + masks, + begin_bits, + child.size(), + stream, + mr); child.set_null_count(UNKNOWN_NULL_COUNT); } diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cu index 8afa4faa9e3..2f820da687e 100644 --- a/cpp/tests/bitmask/bitmask_tests.cu +++ b/cpp/tests/bitmask/bitmask_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include struct BitmaskUtilitiesTest : public cudf::test::BaseFixture { }; @@ -413,7 +414,7 @@ TEST_F(CopyBitmaskTest, TestZeroOffset) cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; CUDF_TEST_EXPECT_EQUAL_BUFFERS( - gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT); + gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits)); } TEST_F(CopyBitmaskTest, TestNonZeroOffset) @@ -433,7 +434,7 @@ TEST_F(CopyBitmaskTest, TestNonZeroOffset) cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; CUDF_TEST_EXPECT_EQUAL_BUFFERS( - gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT); + gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits)); } TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) @@ -468,7 +469,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views); cleanEndWord(concatenated_bitmask, 0, num_elements); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT); + concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements)); } TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) @@ -493,7 +494,60 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views); cleanEndWord(concatenated_bitmask, 0, num_elements); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT); + concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements)); +} + +struct MergeBitmaskTest : public cudf::test::BaseFixture { +}; + +TEST_F(MergeBitmaskTest, TestBitmaskAnd) +{ + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {0, 1, 1, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col3({0, 2, 1, 0, 255}); + + auto const input1 = cudf::table_view({bools_col3}); + auto const input2 = cudf::table_view({bools_col1, bools_col2}); + auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); + + rmm::device_buffer result1 = cudf::bitmask_and(input1); + rmm::device_buffer result2 = cudf::bitmask_and(input2); + rmm::device_buffer result3 = cudf::bitmask_and(input3); + + auto odd_indices = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); + auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows()); + + EXPECT_EQ(nullptr, result1.data()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); +} + +TEST_F(MergeBitmaskTest, TestBitmaskOr) +{ + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {0, 0, 1, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col3({0, 2, 1, 0, 255}); + + auto const input1 = cudf::table_view({bools_col3}); + auto const input2 = cudf::table_view({bools_col1, bools_col2}); + auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); + + rmm::device_buffer result1 = cudf::bitmask_or(input1); + rmm::device_buffer result2 = cudf::bitmask_or(input2); + rmm::device_buffer result3 = cudf::bitmask_or(input3); + + auto all_but_index3 = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + auto null3 = + cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); + + EXPECT_EQ(nullptr, result1.data()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); + EXPECT_EQ(nullptr, result3.data()); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/iterator/value_iterator_test.cu b/cpp/tests/iterator/value_iterator_test.cu index 3ad7ac6d0cd..542123ffd25 100644 --- a/cpp/tests/iterator/value_iterator_test.cu +++ b/cpp/tests/iterator/value_iterator_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -343,7 +343,7 @@ TYPED_TEST(IteratorTest, error_handling) CUDF_EXPECT_THROW_MESSAGE((cudf::detail::make_null_replacement_iterator( *d_col_no_null, cudf::test::make_type_param_scalar(0))), - "Unexpected non-nullable column."); + "column with nulls must have a validity bitmask"); CUDF_EXPECT_THROW_MESSAGE((d_col_no_null->pair_begin()), "Unexpected non-nullable column."); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 549e5e0d215..8372b3977c0 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -509,8 +509,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanSum) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{1, 3, 6, 10}, scale}; auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({1, 2, 3, 0, 4, 0}, {1, 1, 1, 0, 1, 0}, scale); + auto const expected_nulls = fp_wrapper({1, 3, 6, 0, 10, 0}, {1, 1, 1, 0, 1, 0}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -526,8 +531,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointPreScanSum) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{0, 1, 3, 6}, scale}; auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({0, 1, 2, 3, 0, 4}, {0, 1, 1, 1, 0, 1}, scale); + auto const expected_nulls = fp_wrapper({0, 0, 1, 3, 0, 6}, {0, 1, 1, 1, 0, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -556,8 +566,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMin) auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const expected = fp_wrapper{{1, 1, 1, 1}, scale}; auto const result = cudf::scan(column, cudf::make_min_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + + auto const with_nulls = fp_wrapper({1, 0, 2, 0, 3, 4}, {1, 0, 1, 0, 1, 1}, scale); + auto const expected_nulls = fp_wrapper({1, 0, 1, 0, 1, 1}, {1, 0, 1, 0, 1, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_min_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls); } } @@ -572,7 +587,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMax) auto const scale = scale_type{i}; auto const column = fp_wrapper{{1, 2, 3, 4}, scale}; auto const result = cudf::scan(column, cudf::make_max_aggregation(), scan_type::INCLUSIVE); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), column); + + auto const with_nulls = fp_wrapper({1, 0, 0, 2, 3, 4}, {1, 0, 0, 1, 1, 1}, scale); + auto const result_nulls = + cudf::scan(with_nulls, cudf::make_max_aggregation(), scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), with_nulls); } } diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 54866ff6eee..b68d7b5849f 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -21,7 +21,10 @@ # import os import sys + +from docutils.nodes import Text from recommonmark.transform import AutoStructify +from sphinx.addnodes import pending_xref sys.path.insert(0, os.path.abspath("../..")) @@ -74,9 +77,9 @@ # built documents. # # The short X.Y version. -version = '0.19' +version = "0.19" # The full version, including alpha/beta/rc tags. -release = '0.19.0' +release = "0.19.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -193,7 +196,10 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {"https://docs.python.org/": None} +intersphinx_mapping = { + "python": ("https://docs.python.org/", None), + "cupy": ("https://docs.cupy.dev/en/stable/", None), +} # Config numpydoc numpydoc_show_inherited_class_members = True @@ -202,14 +208,51 @@ autoclass_content = "init" # Config AutoStructify -github_doc_root = 'https://github.com/rtfd/recommonmark/tree/master/doc/' +github_doc_root = "https://github.com/rtfd/recommonmark/tree/master/doc/" + +# Replace API shorthands with fullname +_reftarget_aliases = { + "cudf.Series": ("cudf.core.series.Series", "cudf.Series"), + "cudf.Index": ("cudf.core.index.Index", "cudf.Index"), + "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"), +} + +_internal_names_to_ignore = {"cudf.core.column.string.StringColumn"} + + +def resolve_aliases(app, doctree): + pending_xrefs = doctree.traverse(condition=pending_xref) + for node in pending_xrefs: + alias = node.get("reftarget", None) + if alias is not None and alias in _reftarget_aliases: + real_ref, text_to_render = _reftarget_aliases[alias] + node["reftarget"] = real_ref + + text_node = next( + iter(node.traverse(lambda n: n.tagname == "#text")) + ) + text_node.parent.replace(text_node, Text(text_to_render, "")) + + +def ignore_internal_references(app, env, node, contnode): + name = node.get("reftarget", None) + if name is not None and name in _internal_names_to_ignore: + node["reftarget"] = "" + return contnode + def setup(app): app.add_js_file("copybutton_pydocs.js") app.add_css_file("params.css") app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") - app.add_config_value('recommonmark_config', { - 'url_resolver': lambda url: github_doc_root + url, - 'auto_toc_tree_section': 'Contents', - }, True) + app.add_config_value( + "recommonmark_config", + { + "url_resolver": lambda url: github_doc_root + url, + "auto_toc_tree_section": "Contents", + }, + True, + ) app.add_transform(AutoStructify) + app.connect("doctree-read", resolve_aliases) + app.connect("missing-reference", ignore_internal_references) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 2201fb1fe74..9f414661967 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -167,16 +167,16 @@ private static long getColumnViewFromColumn(long nativePointer) { } } - - private static long initViewHandle(DType type, int rows, int nc, DeviceMemoryBuffer dataBuffer, - DeviceMemoryBuffer validityBuffer, - DeviceMemoryBuffer offsetBuffer, long[] childHandles) { + static long initViewHandle(DType type, int rows, int nc, + BaseDeviceMemoryBuffer dataBuffer, + BaseDeviceMemoryBuffer validityBuffer, + BaseDeviceMemoryBuffer offsetBuffer, long[] childHandles) { long cd = dataBuffer == null ? 0 : dataBuffer.address; long cdSize = dataBuffer == null ? 0 : dataBuffer.length; long od = offsetBuffer == null ? 0 : offsetBuffer.address; long vd = validityBuffer == null ? 0 : validityBuffer.address; return makeCudfColumnView(type.typeId.getNativeId(), type.getScale(), cd, cdSize, - od, vd, nc, rows, childHandles) ; + od, vd, nc, rows, childHandles); } static ColumnVector fromViewWithContiguousAllocation(long columnViewAddress, DeviceMemoryBuffer buffer) { diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 331c5b08764..e0cc96263b3 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -18,9 +18,8 @@ package ai.rapids.cudf; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; +import java.util.*; +import java.util.stream.IntStream; import static ai.rapids.cudf.HostColumnVector.OFFSET_SIZE; @@ -49,6 +48,65 @@ protected ColumnView(long address) { this.nullCount = ColumnView.getNativeNullCount(viewHandle); } + /** + * Create a new column view based off of data already on the device. Ref count on the buffers + * is not incremented and none of the underlying buffers are owned by this view. The returned + * ColumnView is only valid as long as the underlying buffers remain valid. If the buffers are + * closed before this ColumnView is closed, it will result in undefined behavior. + * + * If ownership is needed, call {@link ColumnView#copyToColumnVector} + * + * @param type the type of the vector + * @param rows the number of rows in this vector. + * @param nullCount the number of nulls in the dataset. + * @param validityBuffer an optional validity buffer. Must be provided if nullCount != 0. + * The ownership doesn't change on this buffer + * @param offsetBuffer a host buffer required for nested types including strings and string + * categories. The ownership doesn't change on this buffer + * @param children an array of ColumnView children + */ + public ColumnView(DType type, long rows, Optional nullCount, + BaseDeviceMemoryBuffer validityBuffer, + BaseDeviceMemoryBuffer offsetBuffer, ColumnView[] children) { + this(type, (int) rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(), + null, validityBuffer, offsetBuffer, children); + assert(type.isNestedType()); + assert (nullCount.isPresent() && nullCount.get() <= Integer.MAX_VALUE) + || !nullCount.isPresent(); + } + + /** + * Create a new column view based off of data already on the device. Ref count on the buffers + * is not incremented and none of the underlying buffers are owned by this view. The returned + * ColumnView is only valid as long as the underlying buffers remain valid. If the buffers are + * closed before this ColumnView is closed, it will result in undefined behavior. + * + * If ownership is needed, call {@link ColumnView#copyToColumnVector} + * + * @param type the type of the vector + * @param rows the number of rows in this vector. + * @param nullCount the number of nulls in the dataset. + * @param dataBuffer a host buffer required for nested types including strings and string + * categories. The ownership doesn't change on this buffer + * @param validityBuffer an optional validity buffer. Must be provided if nullCount != 0. + * The ownership doesn't change on this buffer + */ + public ColumnView(DType type, long rows, Optional nullCount, + BaseDeviceMemoryBuffer dataBuffer, + BaseDeviceMemoryBuffer validityBuffer) { + this(type, (int) rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(), + dataBuffer, validityBuffer, null, null); + assert (!type.isNestedType()); + assert (nullCount.isPresent() && nullCount.get() <= Integer.MAX_VALUE) + || !nullCount.isPresent(); + } + + private ColumnView(DType type, long rows, int nullCount, + BaseDeviceMemoryBuffer dataBuffer, BaseDeviceMemoryBuffer validityBuffer, + BaseDeviceMemoryBuffer offsetBuffer, ColumnView[] children) { + this(ColumnVector.initViewHandle(type, (int) rows, nullCount, dataBuffer, validityBuffer, + offsetBuffer, Arrays.stream(children).mapToLong(c -> c.getNativeView()).toArray())); + } /** Creates a ColumnVector from a column view handle * @return a new ColumnVector @@ -520,7 +578,7 @@ public final ColumnVector normalizeNANsAndZeros() { * @return the new ColumnVector with merged null mask. */ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... columns) { - assert mergeOp == BinaryOp.BITWISE_AND : "Only BITWISE_AND supported right now"; + assert mergeOp == BinaryOp.BITWISE_AND || mergeOp == BinaryOp.BITWISE_OR : "Only BITWISE_AND and BITWISE_OR supported right now"; long[] columnViews = new long[columns.length]; long size = getRowCount(); @@ -1296,6 +1354,86 @@ public ColumnVector castTo(DType type) { return new ColumnVector(castTo(getNativeView(), type.typeId.getNativeId(), type.getScale())); } + /** + * This method takes in a nested type and replaces its children with the given views + * Note: Make sure the numbers of rows in the leaf node are the same as the child replacing it + * otherwise the list can point to elements outside of the column values. + * + * Note: this method returns a ColumnView that won't live past the ColumnVector that it's + * pointing to. + * + * Ex: List list = col{{1,3}, {9,3,5}} + * + * validNewChild = col{8, 3, 9, 2, 0} + * + * list.replaceChildrenWithViews(1, validNewChild) => col{{8, 3}, {9, 2, 0}} + * + * invalidNewChild = col{3, 2} + * list.replaceChildrenWithViews(1, invalidNewChild) => col{{3, 2}, {invalid, invalid, invalid}} + * + * invalidNewChild = col{8, 3, 9, 2, 0, 0, 7} + * list.replaceChildrenWithViews(1, invalidNewChild) => col{{8, 3}, {9, 2, 0}} // undefined result + */ + public ColumnView replaceChildrenWithViews(int[] indices, + ColumnView[] views) { + assert (type.isNestedType()); + assert (indices.length == views.length); + if (type == DType.LIST) { + assert (indices.length == 1); + } + if (indices.length != views.length) { + throw new IllegalArgumentException("The indices size and children size should match"); + } + Map map = new HashMap<>(); + IntStream.range(0, indices.length).forEach(index -> { + if (map.containsKey(indices[index])) { + throw new IllegalArgumentException("Duplicate mapping found for replacing child index"); + } + map.put(indices[index], views[index]); + }); + List newChildren = new ArrayList<>(getNumChildren()); + IntStream.range(0, getNumChildren()).forEach(i -> { + ColumnView view = map.remove(i); + if (view == null) { + newChildren.add(getChildColumnView(i)); + } else { + newChildren.add(view); + } + }); + if (!map.isEmpty()) { + throw new IllegalArgumentException("One or more invalid child indices passed to be replaced"); + } + return new ColumnView(type, getRowCount(), Optional.of(getNullCount()), getValid(), + getOffsets(), newChildren.stream().toArray(n -> new ColumnView[n])); + } + + /** + * This method takes in a list and returns a new list with the leaf node replaced with the given + * view. Make sure the numbers of rows in the leaf node are the same as the child replacing it + * otherwise the list can point to elements outside of the column values. + * + * Note: this method returns a ColumnView that won't live past the ColumnVector that it's + * pointing to. + * + * Ex: List list = col{{1,3}, {9,3,5}} + * + * validNewChild = col{8, 3, 9, 2, 0} + * + * list.replaceChildrenWithViews(1, validNewChild) => col{{8, 3}, {9, 2, 0}} + * + * invalidNewChild = col{3, 2} + * list.replaceChildrenWithViews(1, invalidNewChild) => + * col{{3, 2}, {invalid, invalid, invalid}} throws an exception + * + * invalidNewChild = col{8, 3, 9, 2, 0, 0, 7} + * list.replaceChildrenWithViews(1, invalidNewChild) => + * col{{8, 3}, {9, 2, 0}} throws an exception + */ + public ColumnView replaceListChild(ColumnView child) { + assert(type == DType.LIST); + return replaceChildrenWithViews(new int[]{1}, new ColumnView[]{child}); + } + /** * Zero-copy cast between types with the same underlying representation. * diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 3385343c291..737abea6f13 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -31,7 +31,6 @@ #include "cudf_jni_apis.hpp" #include "dtype_utils.hpp" - extern "C" { JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_sequence(JNIEnv *env, jclass, @@ -315,96 +314,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeEmptyCudfColumn(JNI CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeNumericCudfColumn( - JNIEnv *env, jobject j_object, jint j_type, jint j_size, jint j_mask_state) { - - JNI_ARG_CHECK(env, (j_size != 0), "size is 0", 0); - - try { - cudf::jni::auto_set_device(env); - cudf::type_id n_type = static_cast(j_type); - cudf::data_type n_data_type(n_type); - cudf::size_type n_size = static_cast(j_size); - cudf::mask_state n_mask_state = static_cast(j_mask_state); - std::unique_ptr column( - cudf::make_numeric_column(n_data_type, n_size, n_mask_state)); - return reinterpret_cast(column.release()); - } - CATCH_STD(env, 0); -} - -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeTimestampCudfColumn( - JNIEnv *env, jobject j_object, jint j_type, jint j_size, jint j_mask_state) { - - JNI_NULL_CHECK(env, j_type, "type id is null", 0); - JNI_NULL_CHECK(env, j_size, "size is null", 0); - - try { - cudf::jni::auto_set_device(env); - cudf::type_id n_type = static_cast(j_type); - std::unique_ptr n_data_type(new cudf::data_type(n_type)); - cudf::size_type n_size = static_cast(j_size); - cudf::mask_state n_mask_state = static_cast(j_mask_state); - std::unique_ptr column( - cudf::make_timestamp_column(*n_data_type.get(), n_size, n_mask_state)); - return reinterpret_cast(column.release()); - } - CATCH_STD(env, 0); -} - -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeStringCudfColumnHostSide( - JNIEnv *env, jobject j_object, jlong j_char_data, jlong j_offset_data, jlong j_valid_data, - jint j_null_count, jint size) { - - JNI_ARG_CHECK(env, (size != 0), "size is 0", 0); - JNI_NULL_CHECK(env, j_char_data, "char data is null", 0); - JNI_NULL_CHECK(env, j_offset_data, "offset is null", 0); - - try { - cudf::jni::auto_set_device(env); - cudf::size_type *host_offsets = reinterpret_cast(j_offset_data); - char *n_char_data = reinterpret_cast(j_char_data); - cudf::size_type n_data_size = host_offsets[size]; - cudf::bitmask_type *n_validity = reinterpret_cast(j_valid_data); - - if (n_validity == nullptr) { - j_null_count = 0; - } - - std::unique_ptr offsets = cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT32}, size + 1, cudf::mask_state::UNALLOCATED); - auto offsets_view = offsets->mutable_view(); - JNI_CUDA_TRY(env, 0, - cudaMemcpyAsync(offsets_view.data(), host_offsets, - (size + 1) * sizeof(int32_t), cudaMemcpyHostToDevice)); - - std::unique_ptr data = cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT8}, n_data_size, cudf::mask_state::UNALLOCATED); - auto data_view = data->mutable_view(); - JNI_CUDA_TRY(env, 0, - cudaMemcpyAsync(data_view.data(), n_char_data, n_data_size, - cudaMemcpyHostToDevice)); - - std::unique_ptr column; - if (j_null_count == 0) { - column = - cudf::make_strings_column(size, std::move(offsets), std::move(data), j_null_count, {}); - } else { - cudf::size_type bytes = (cudf::word_index(size) + 1) * sizeof(cudf::bitmask_type); - rmm::device_buffer dev_validity(bytes); - JNI_CUDA_TRY(env, 0, - cudaMemcpyAsync(dev_validity.data(), n_validity, bytes, cudaMemcpyHostToDevice)); - - column = cudf::make_strings_column(size, std::move(offsets), std::move(data), j_null_count, - std::move(dev_validity)); - } - - JNI_CUDA_TRY(env, 0, cudaStreamSynchronize(0)); - return reinterpret_cast(column.release()); - } - CATCH_STD(env, 0); -} - JNIEXPORT jint JNICALL Java_ai_rapids_cudf_ColumnVector_getNativeNullCountColumn(JNIEnv *env, jobject j_object, jlong handle) { diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index a0613f9b73f..e8474bda1be 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1305,8 +1305,15 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit cudf::table_view *input_table = new cudf::table_view(column_views); cudf::binary_operator op = static_cast(bin_op); - if(op == cudf::binary_operator::BITWISE_AND) { - copy->set_null_mask(cudf::bitmask_and(*input_table)); + switch(op) { + case cudf::binary_operator::BITWISE_AND: + copy->set_null_mask(cudf::bitmask_and(*input_table)); + break; + case cudf::binary_operator::BITWISE_OR: + copy->set_null_mask(cudf::bitmask_or(*input_table)); + break; + default: + JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "Unsupported merge operation", 0); } return reinterpret_cast(copy.release()); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index a3500ae86ef..0675ece4863 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -571,7 +571,7 @@ void testSpark32BitMurmur3HashMixed() { } @Test - void testNullReconfigureNulls() { + void testAndNullReconfigureNulls() { try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0); @@ -585,6 +585,28 @@ void testNullReconfigureNulls() { } } + @Test + void testOrNullReconfigureNulls() { + try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); + ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); + ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE); + ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); + ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); + ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); + ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); + ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); + ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { + assertColumnsAreEqual(v0, intResultV0); + assertColumnsAreEqual(v1, intResultV0V1); + assertColumnsAreEqual(v1, intResultMulti); + assertColumnsAreEqual(v2, intResultv0v1v2); + assertColumnsAreEqual(stringExpected, stringResult); + assertColumnsAreEqual(v3, noMaskResult); + } + } + @Test void isNotNullTestEmptyColumn() { try (ColumnVector v = ColumnVector.fromBoxedInts(); @@ -3951,4 +3973,105 @@ void testMakeList() { assertColumnsAreEqual(expected, created); } } + + @Test + void testReplaceLeafNodeInList() { + try ( + ColumnVector c1 = ColumnVector.fromInts(1, 2); + ColumnVector c2 = ColumnVector.fromInts(8, 3); + ColumnVector c3 = ColumnVector.fromInts(9, 8); + ColumnVector c4 = ColumnVector.fromInts(2, 6); + ColumnVector expected = ColumnVector.makeList(c1, c2, c3, c4); + ColumnVector child1 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 770.892, 961.110); + ColumnVector child2 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 524.982, 479.946); + ColumnVector child3 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 346.997, 479.946); + ColumnVector child4 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 87.764, 414.239); + ColumnVector created = ColumnVector.makeList(child1, child2, child3, child4); + ColumnVector newChild = ColumnVector.fromInts(1, 8, 9, 2, 2, 3, 8, 6); + ColumnView replacedView = created.replaceListChild(newChild)) { + try (ColumnVector replaced = replacedView.copyToColumnVector()) { + assertColumnsAreEqual(expected, replaced); + } + } + } + + @Test + void testReplaceLeafNodeInListWithIllegal() { + assertThrows(IllegalArgumentException.class, () -> { + try (ColumnVector child1 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 770.892, 961.110); + ColumnVector child2 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 524.982, 479.946); + ColumnVector child3 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 346.997, 479.946); + ColumnVector child4 = + ColumnVector.decimalFromDoubles(DType.create(DType.DTypeEnum.DECIMAL64, 3), + RoundingMode.HALF_UP, 87.764, 414.239); + ColumnVector created = ColumnVector.makeList(child1, child2, child3, child4); + ColumnVector newChild = ColumnVector.fromInts(0, 1, 8, 9, 2, 2, 3, 8, 6); + ColumnView replacedView = created.replaceListChild(newChild)) { + } + }); + } + + @Test + void testReplaceColumnInStruct() { + try (ColumnVector expected = ColumnVector.fromStructs(new StructType(false, + Arrays.asList( + new BasicType(false, DType.INT32), + new BasicType(false, DType.INT32), + new BasicType(false, DType.INT32))), + new HostColumnVector.StructData(1, 5, 3), + new HostColumnVector.StructData(4, 9, 6)); + ColumnVector child1 = ColumnVector.fromInts(1, 4); + ColumnVector child2 = ColumnVector.fromInts(2, 5); + ColumnVector child3 = ColumnVector.fromInts(3, 6); + ColumnVector created = ColumnVector.makeStruct(child1, child2, child3); + ColumnVector replaceWith = ColumnVector.fromInts(5, 9); + ColumnView replacedView = created.replaceChildrenWithViews(new int[]{1}, + new ColumnVector[]{replaceWith})) { + try (ColumnVector replaced = replacedView.copyToColumnVector()) { + assertColumnsAreEqual(expected, replaced); + } + } + } + + @Test + void testReplaceIllegalIndexColumnInStruct() { + assertThrows(IllegalArgumentException.class, () -> { + try (ColumnVector child1 = ColumnVector.fromInts(1, 4); + ColumnVector child2 = ColumnVector.fromInts(2, 5); + ColumnVector child3 = ColumnVector.fromInts(3, 6); + ColumnVector created = ColumnVector.makeStruct(child1, child2, child3); + ColumnVector replaceWith = ColumnVector.fromInts(5, 9); + ColumnView replacedView = created.replaceChildrenWithViews(new int[]{5}, + new ColumnVector[]{replaceWith})) { + } + }); + } + + @Test + void testReplaceSameIndexColumnInStruct() { + assertThrows(IllegalArgumentException.class, () -> { + try (ColumnVector child1 = ColumnVector.fromInts(1, 4); + ColumnVector child2 = ColumnVector.fromInts(2, 5); + ColumnVector child3 = ColumnVector.fromInts(3, 6); + ColumnVector created = ColumnVector.makeStruct(child1, child2, child3); + ColumnVector replaceWith = ColumnVector.fromInts(5, 9); + ColumnView replacedView = created.replaceChildrenWithViews(new int[]{1, 1}, + new ColumnVector[]{replaceWith, replaceWith})) { + } + }); + } } diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 0a1f6529cc7..81abdd3f66a 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -347,7 +347,7 @@ def cat(self, sep: str = None, na_rep: str = None) -> str: @overload def cat( self, others, sep: str = None, na_rep: str = None - ) -> Union[ParentType, "cudf.core.column.StringColumn"]: + ) -> Union[ParentType, "cudf.core.column.string.StringColumn"]: ... def cat(self, others=None, sep=None, na_rep=None):