diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f690f5f827..46d5223f7d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + repos: - repo: https://github.com/PyCQA/isort rev: 5.6.4 @@ -56,6 +58,15 @@ repos: hooks: - id: pydocstyle args: ["--config=python/.flake8"] + exclude: | + (?x)^( + ci| + cpp| + conda| + docs| + java| + notebooks + ) - repo: https://github.com/pre-commit/mirrors-clang-format rev: v11.1.0 hooks: diff --git a/build.sh b/build.sh index 48182ca1a6f..ab3bd0e7a89 100755 --- a/build.sh +++ b/build.sh @@ -112,16 +112,22 @@ function buildLibCudfJniInDocker { local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} local workspaceRepoDir="$workspaceDir/cudf" local workspaceMavenRepoDir="$workspaceDir/.m2/repository" + local workspaceCcacheDir="$workspaceDir/.ccache" mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build" + mkdir -p "$HOME/.ccache" "$HOME/.m2" nvidia-docker build \ -f java/ci/Dockerfile.centos7 \ --build-arg CUDA_VERSION=${cudaVersion} \ -t $imageName . nvidia-docker run -it -u $(id -u):$(id -g) --rm \ + -e PARALLEL_LEVEL \ + -e CCACHE_DISABLE \ + -e CCACHE_DIR="$workspaceCcacheDir" \ -v "/etc/group:/etc/group:ro" \ -v "/etc/passwd:/etc/passwd:ro" \ -v "/etc/shadow:/etc/shadow:ro" \ -v "/etc/sudoers.d:/etc/sudoers.d:ro" \ + -v "$HOME/.ccache:$workspaceCcacheDir:rw" \ -v "$REPODIR:$workspaceRepoDir:rw" \ -v "$localMavenRepo:$workspaceMavenRepoDir:rw" \ --workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \ @@ -129,11 +135,16 @@ function buildLibCudfJniInDocker { scl enable devtoolset-9 \ "cmake $workspaceRepoDir/cpp \ -G${CMAKE_GENERATOR} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_LINKER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DCUDA_STATIC_RUNTIME=ON \ -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \ - -DCMAKE_INSTALL_PREFIX==/usr/local/rapids \ - -DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \ + -DCMAKE_INSTALL_PREFIX=/usr/local/rapids \ + -DUSE_NVTX=ON \ + -DCUDF_USE_ARROW_STATIC=ON \ -DCUDF_ENABLE_ARROW_S3=OFF \ -DBUILD_TESTS=OFF \ -DPER_THREAD_DEFAULT_STREAM=ON \ @@ -145,6 +156,10 @@ function buildLibCudfJniInDocker { -Dmaven.repo.local=$workspaceMavenRepoDir \ -DskipTests=${SKIP_TESTS:-false} \ -Dparallel.level=${PARALLEL_LEVEL} \ + -Dcmake.ccache.opts='-DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_LINKER_LAUNCHER=ccache' \ -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \ -DCUDA_STATIC_RUNTIME=ON \ -DPER_THREAD_DEFAULT_STREAM=ON \ diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 0806bb964cf..68008e13897 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -79,6 +79,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/calendrical_month_sequence.cuh - test -f $PREFIX/include/cudf/detail/concatenate.hpp - test -f $PREFIX/include/cudf/detail/copy.hpp + - test -f $PREFIX/include/cudf/detail/copy.cuh - test -f $PREFIX/include/cudf/detail/datetime.hpp - test -f $PREFIX/include/cudf/detail/fill.hpp - test -f $PREFIX/include/cudf/detail/gather.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 15caaec9bec..42a434ba53d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -189,7 +189,6 @@ add_library( src/ast/expression_parser.cpp src/ast/expressions.cpp src/binaryop/binaryop.cpp - src/binaryop/compiled/binary_ops.cu src/binaryop/compiled/Add.cu src/binaryop/compiled/ATan2.cu src/binaryop/compiled/BitwiseAnd.cu @@ -220,6 +219,7 @@ add_library( src/binaryop/compiled/ShiftRightUnsigned.cu src/binaryop/compiled/Sub.cu src/binaryop/compiled/TrueDiv.cu + src/binaryop/compiled/binary_ops.cu src/binaryop/compiled/util.cpp src/labeling/label_bins.cu src/bitmask/null_mask.cu @@ -238,6 +238,7 @@ add_library( src/copying/gather.cu src/copying/get_element.cu src/copying/pack.cpp + src/copying/purge_nonempty_nulls.cu src/copying/reverse.cu src/copying/sample.cu src/copying/scatter.cu @@ -361,6 +362,7 @@ add_library( src/join/mixed_join_size_kernel_nulls.cu src/join/mixed_join_size_kernels_semi.cu src/join/semi_join.cu + src/lists/apply_boolean_mask.cu src/lists/contains.cu src/lists/combine/concatenate_list_elements.cu src/lists/combine/concatenate_rows.cu diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 7d356263220..da64c1bbf3c 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -145,6 +146,8 @@ std::vector segments_in_chunk(int num_segments, int num_chunks, // Executes the command and returns stderr output std::string exec_cmd(std::string_view cmd) { + // Prevent the output from the command from mixing with the original process' output + std::fflush(nullptr); // Switch stderr and stdout to only capture stderr auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null"); std::unique_ptr pipe(popen(redirected_cmd.c_str(), "r"), pclose); diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp index 749badc715d..149c6ad7219 100644 --- a/cpp/benchmarks/stream_compaction/distinct.cpp +++ b/cpp/benchmarks/stream_compaction/distinct.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -55,3 +56,43 @@ NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type)) .set_name("distinct") .set_type_axes_names({"Type"}) .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}); + +template +void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) +{ + cudf::rmm_pool_raii pool_raii; + + auto const size = state.get_int64("ColumnSize"); + auto const dtype = cudf::type_to_id(); + double const null_frequency = state.get_float64("null_frequency"); + + data_profile table_data_profile; + if (dtype == cudf::type_id::LIST) { + table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 4); + table_data_profile.set_distribution_params( + cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4); + table_data_profile.set_list_depth(1); + } else { + // We're comparing distinct() on a non-nested column to that on a list column with the same + // number of distinct rows. The max list size is 4 and the number of distinct values in the + // list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 + 5^4 = 781 + // We want this column to also have 781 distinct values. + table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 781); + } + table_data_profile.set_null_frequency(null_frequency); + + auto const table = create_random_table( + {dtype}, table_size_bytes{static_cast(size)}, table_data_profile, 0); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream_view{launch.get_stream()}; + auto result = cudf::detail::distinct(*table, {0}, cudf::null_equality::EQUAL, stream_view); + }); +} + +NVBENCH_BENCH_TYPES(nvbench_distinct_list, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("distinct_list") + .set_type_axes_names({"Type"}) + .add_float64_axis("null_frequency", {0.0, 0.1}) + .add_int64_axis("ColumnSize", {100'000'000}); diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp index d8357dcf92c..2c430868341 100644 --- a/cpp/benchmarks/text/subword.cpp +++ b/cpp/benchmarks/text/subword.cpp @@ -14,7 +14,8 @@ * limitations under the License. */ -#include +#include +#include #include @@ -53,9 +54,9 @@ static std::string create_hash_vocab_file() return hash_file; } -static void BM_cuda_tokenizer_cudf(benchmark::State& state) +static void BM_subword_tokenizer(benchmark::State& state) { - uint32_t nrows = 1000; + auto const nrows = static_cast(state.range(0)); std::vector h_strings(nrows, "This is a test "); cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); std::string hash_file = create_hash_vocab_file(); @@ -67,6 +68,7 @@ static void BM_cuda_tokenizer_cudf(benchmark::State& state) // auto vocab = nvtext::load_vocabulary_file(hash_file); for (auto _ : state) { + cuda_event_timer raii(state, true); auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings}, *vocab, max_sequence_length, @@ -76,6 +78,18 @@ static void BM_cuda_tokenizer_cudf(benchmark::State& state) MAX_ROWS_TENSOR); } } -BENCHMARK(BM_cuda_tokenizer_cudf); -BENCHMARK_MAIN(); +class Subword : public cudf::benchmark { +}; + +#define SUBWORD_BM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \ + BENCHMARK_REGISTER_F(Subword, name) \ + ->RangeMultiplier(2) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer); + +// BENCHMARK_MAIN(); diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 5232821d113..332b0d9dc96 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -22,7 +22,7 @@ function(find_and_configure_cucollections) GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5 + GIT_TAG 8b15f06f38d034e815bc72045ca3403787f75e07 EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 2e559afef4f..8f1ad7da9b6 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -17,7 +17,10 @@ #pragma once #include +#include #include +#include +#include #include #include @@ -939,5 +942,155 @@ std::unique_ptr sample( int64_t const seed = 0, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Checks if a column or its descendants have non-empty null rows + * + * @note This function is exact. If it returns `true`, there exists one or more + * non-empty null elements. + * + * A LIST or STRING column might have non-empty rows that are marked as null. + * A STRUCT OR LIST column might have child columns that have non-empty null rows. + * Other types of columns are deemed incapable of having non-empty null rows. + * E.g. Fixed width columns have no concept of an "empty" row. + * + * @param input The column which is (and whose descendants are) to be checked for + * non-empty null rows. + * @return true If either the column or its descendants have non-empty null rows. + * @return false If neither the column or its descendants have non-empty null rows. + */ +bool has_nonempty_nulls(column_view const& input); + +/** + * @brief Approximates if a column or its descendants *may* have non-empty null elements + * + * @note This function is approximate. + * - `true`: Non-empty null elements could exist + * - `false`: Non-empty null elements definitely do not exist + * + * False positives are possible, but false negatives are not. + * + * Compared to the exact `has_nonempty_nulls()` function, this function is typically + * more efficient. + * + * Complexity: + * - Best case: `O(count_descendants(input))` + * - Worst case: `O(count_descendants(input)) * m`, where `m` is the number of rows in the largest + * descendant + * + * @param input The column which is (and whose descendants are) to be checked for + * non-empty null rows + * @return true If either the column or its decendants have null rows + * @return false If neither the column nor its descendants have null rows + */ +bool may_have_nonempty_nulls(column_view const& input); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * LIST columns may have non-empty null rows. + * For example: + * @code{.pseudo} + * + * auto const lists = lists_column_wrapper{ {0,1}, {2,3}, {4,5} }.release(); + * cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false); + * + * lists[1] is now null, but the lists child column still stores `{2,3}`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [0, 1, 2, 3, 4, 5] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [0, 1, 4, 5] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + lists_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * STRING columns may have non-empty null rows. + * For example: + * @code{.pseudo} + * + * auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release(); + * cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false); + * + * strings[1] is now null, but the strings column still stores `"CD"`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [A, B, C, D, E, F] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [A, B, E, F] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + strings_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Copies `input`, purging any non-empty null rows in the column or its descendants + * + * STRUCTS columns may have null rows, with non-empty child rows. + * For example: + * @code{.pseudo} + * + * auto const lists = lists_column_wrapper{ {0,1}, {2,3}, {4,5} }; + * auto const structs = structs_column_wrapper{ {lists}, null_at(1) }; + * + * structs[1].child is now null, but the lists column still stores `{2,3}`. + * The lists column contents will be: + * Validity: 101 + * Offsets: [0, 2, 4, 6] + * Child: [0, 1, 2, 3, 4, 5] + * + * After purging the contents of the list's null rows, the column's contents + * will be: + * Validity: 101 + * Offsets: [0, 2, 2, 4] + * Child: [0, 1, 4, 5] + * @endcode + * + * The purge operation only applies directly to LIST and STRING columns, but it + * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns + * may have child/decendant columns that are LIST or STRING. + * + * @param input The column whose null rows are to be checked and purged + * @param mr Device memory resource used to allocate the returned column's device memory + * @return std::unique_ptr Column with equivalent contents to `input`, but with + * the contents of null rows purged + */ +std::unique_ptr purge_nonempty_nulls( + structs_column_view const& input, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ } // namespace cudf diff --git a/cpp/include/cudf/detail/copy.cuh b/cpp/include/cudf/detail/copy.cuh new file mode 100644 index 00000000000..773bce7131f --- /dev/null +++ b/cpp/include/cudf/detail/copy.cuh @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace cudf::detail { + +/** + * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*) + * + * @tparam ColumnViewT View type (lists_column_view, strings_column_view, or strings_column_view) + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +std::unique_ptr purge_nonempty_nulls(ColumnViewT const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Implement via identity gather. + auto const input_column = input.parent(); + auto const gather_begin = thrust::counting_iterator(0); + auto const gather_end = gather_begin + input_column.size(); + + auto gathered_table = cudf::detail::gather(table_view{{input_column}}, + gather_begin, + gather_end, + out_of_bounds_policy::DONT_CHECK, + stream, + mr); + return std::move(gathered_table->release()[0]); +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 50157d16876..abd14fbda89 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -299,5 +299,22 @@ std::unique_ptr get_element( size_type index, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::has_nonempty_nulls + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +bool has_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + +/** + * @copydoc cudf::may_have_nonempty_nulls + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +bool may_have_nonempty_nulls(column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp index e8e100aaec5..9958fa8f3a4 100644 --- a/cpp/include/cudf/detail/hashing.hpp +++ b/cpp/include/cudf/detail/hashing.hpp @@ -33,19 +33,20 @@ namespace detail { std::unique_ptr hash( table_view const& input, hash_id hash_function = hash_id::HASH_MURMUR3, - uint32_t seed = 0, + uint32_t seed = cudf::DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); std::unique_ptr murmur_hash3_32( table_view const& input, + uint32_t seed = cudf::DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); template