diff --git a/CHANGELOG.md b/CHANGELOG.md index 2064f0ea04d..a7332b5c2f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,29 +2,38 @@ ## New Features - PR #6856 Add groupby idxmin, idxmax aggregation - - PR #6847 Add a cmake find module for cuFile in JNI code - PR #6902 Implement `DataFrame.quantile` for `datetime` and `timedelta` data types - PR #6814 Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1) - PR #6929 Add `Index.set_names` api - PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support +- PR #6885 Share `factorize` implementation with Index and cudf module + +- PR #6775 Implement cudf.DateOffset for months ## Improvements +- PR #6938 Pass numeric scalars of the same dtype through numeric binops - PR #6275 Update to official libcu++ on Github - PR #6838 Fix `columns` & `index` handling in dataframe constructor - PR #6750 Remove **kwargs from string/categorical methods +- PR #6909 Support reading byte array backed decimal columns from parquet files - PR #6939 Use simplified `rmm::exec_policy` +- PR #6512 Refactor rolling.cu to reduce compile time +- PR #6982 Disable some pragma unroll statements in thrust `sort.h` ## Bug Fixes +- PR #6884 Correct the sampling range when sampling with replacement +- PR #6903 Add null count test for apply_boolean_mask - PR #6922 Fix N/A detection for empty fields in CSV reader - PR #6912 Fix rmm_mode=managed parameter for gtests +- PR #6943 Fix join with nulls not equal performance - PR #6945 Fix groupby agg/apply behaviour when no key columns are provided - PR #6942 Fix cudf::merge gtest for dictionary columns -# cuDF 0.17.0 (Date TBD) +# cuDF 0.17.0 (10 Dec 2020) ## New Features @@ -63,6 +72,7 @@ - PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series - PR #6817 Add support for scatter() on lists-of-struct columns - PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64` +- PR #6483 Add `agg` function to aggregate dataframe using one or more operations - PR #6726 Support selecting different hash functions in hash_partition - PR #6619 Improve Dockerfile - PR #6831 Added parquet chunked writing ability for list columns @@ -153,6 +163,7 @@ - PR #6837 Avoid gather when copying strings view from start of strings column - PR #6859 Move align_ptr_for_type() from cuda.cuh to alignment.hpp - PR #6807 Refactor `std::array` usage in row group index writing in ORC +- PR #6914 Enable groupby `list` aggregation for strings - PR #6908 Parquet option for strictly decimal reading ## Bug Fixes diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 6c516d50a70..6991a5bac01 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -205,15 +205,15 @@ fi cd $WORKSPACE/python/cudf gpuci_logger "Python py.test for cuDF" -py.test --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term cd $WORKSPACE/python/dask_cudf gpuci_logger "Python py.test for dask-cudf" -py.test --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term cd $WORKSPACE/python/custreamz gpuci_logger "Python py.test for cuStreamz" -py.test --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term +py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term gpuci_logger "Test notebooks" ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml index 5acc953c03e..3de28fe30b0 100644 --- a/conda/environments/cudf_dev_cuda10.1.yml +++ b/conda/environments/cudf_dev_cuda10.1.yml @@ -23,6 +23,7 @@ dependencies: - fsspec>=0.6.0 - pytest - pytest-benchmark + - pytest-xdist - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml index 6ae0c6a8703..7055228943a 100644 --- a/conda/environments/cudf_dev_cuda10.2.yml +++ b/conda/environments/cudf_dev_cuda10.2.yml @@ -23,6 +23,7 @@ dependencies: - fsspec>=0.6.0 - pytest - pytest-benchmark + - pytest-xdist - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index c313352d731..497d8feefea 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -23,6 +23,7 @@ dependencies: - fsspec>=0.6.0 - pytest - pytest-benchmark + - pytest-xdist - sphinx - sphinx_rtd_theme - sphinxcontrib-websupport diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index f22929b7649..5348ec471e9 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -26,7 +26,7 @@ requirements: - cmake >=3.17.0 host: - libcudf {{ version }} - - librdkafka 1.5 + - librdkafka >=1.5.0,<1.5.3 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 615960bece6..3f435a4368d 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -164,7 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}") # - sort benchmark -------------------------------------------------------------------------------- set(SORT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cu") ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}") diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp index a064270d5a5..2419114ab20 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.cpp +++ b/cpp/benchmarks/common/generate_benchmark_input.cpp @@ -307,7 +307,7 @@ std::unique_ptr create_random_column(data_profile const& profile, */ struct string_column_data { std::vector chars; - std::vector offsets; + std::vector offsets; std::vector null_mask; explicit string_column_data(cudf::size_type rows, cudf::size_type size) { diff --git a/cpp/benchmarks/join/join_benchmark.cu b/cpp/benchmarks/join/join_benchmark.cu index b18ceafdae6..bd013afc451 100644 --- a/cpp/benchmarks/join/join_benchmark.cu +++ b/cpp/benchmarks/join/join_benchmark.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,7 @@ template class Join : public cudf::benchmark { }; -template +template static void BM_join(benchmark::State &state) { const cudf::size_type build_table_size{(cudf::size_type)state.range(0)}; @@ -46,11 +47,33 @@ static void BM_join(benchmark::State &state) const bool is_build_table_key_unique = true; // Generate build and probe tables - - auto build_key_column = - cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), build_table_size); - auto probe_key_column = - cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), probe_table_size); + cudf::test::UniformRandomGenerator rand_gen(0, build_table_size); + auto build_random_null_mask = [&rand_gen](int size) { + if (Nullable) { + // roughly 25% nulls + auto validity = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [&rand_gen](auto i) { return (rand_gen.generate() & 3) == 0; }); + return cudf::test::detail::make_null_mask(validity, validity + size); + } else { + return cudf::create_null_mask(size, cudf::mask_state::UNINITIALIZED); + } + }; + + std::unique_ptr build_key_column = [&]() { + return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + build_table_size, + build_random_null_mask(build_table_size)) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + build_table_size); + }(); + std::unique_ptr probe_key_column = [&]() { + return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + probe_table_size, + build_random_null_mask(probe_table_size)) + : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), + probe_table_size); + }(); generate_input_tables( build_key_column->mutable_view().data(), @@ -82,17 +105,23 @@ static void BM_join(benchmark::State &state) for (auto _ : state) { cuda_event_timer raii(state, true, 0); - auto result = - cudf::inner_join(probe_table, build_table, columns_to_join, columns_to_join, {{0, 0}}); + auto result = cudf::inner_join(probe_table, + build_table, + columns_to_join, + columns_to_join, + {{0, 0}}, + cudf::null_equality::UNEQUAL); } } -#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type) \ - BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ - (::benchmark::State & st) { BM_join(st); } +#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \ + BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \ + (::benchmark::State & st) { BM_join(st); } -JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t); -JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t); +JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t, false); +JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t, false); +JOIN_BENCHMARK_DEFINE(join_32bit_nulls, int32_t, int32_t, true); +JOIN_BENCHMARK_DEFINE(join_64bit_nulls, int64_t, int64_t, true); BENCHMARK_REGISTER_F(Join, join_32bit) ->Unit(benchmark::kMillisecond) @@ -111,3 +140,21 @@ BENCHMARK_REGISTER_F(Join, join_64bit) ->Args({50'000'000, 50'000'000}) ->Args({40'000'000, 120'000'000}) ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, join_32bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({100'000, 100'000}) + ->Args({100'000, 400'000}) + ->Args({100'000, 1'000'000}) + ->Args({10'000'000, 10'000'000}) + ->Args({10'000'000, 40'000'000}) + ->Args({10'000'000, 100'000'000}) + ->Args({100'000'000, 100'000'000}) + ->Args({80'000'000, 240'000'000}) + ->UseManualTime(); + +BENCHMARK_REGISTER_F(Join, join_64bit_nulls) + ->Unit(benchmark::kMillisecond) + ->Args({50'000'000, 50'000'000}) + ->Args({40'000'000, 120'000'000}) + ->UseManualTime(); diff --git a/cpp/benchmarks/sort/sort_strings_benchmark.cu b/cpp/benchmarks/sort/sort_strings_benchmark.cu new file mode 100644 index 00000000000..0566ac2ed75 --- /dev/null +++ b/cpp/benchmarks/sort/sort_strings_benchmark.cu @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +class SortStrings : public cudf::benchmark { +}; + +static void BM_sort(benchmark::State& state) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + + auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + cudf::sort(table->view()); + } +} + +#define SORT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(SortStrings, name) \ + (::benchmark::State & st) { BM_sort(st); } \ + BENCHMARK_REGISTER_F(SortStrings, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 10, 1 << 24}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SORT_BENCHMARK_DEFINE(stringssort) diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp index 5e84e9fb9ae..d972247c86d 100644 --- a/cpp/benchmarks/synchronization/synchronization.hpp +++ b/cpp/benchmarks/synchronization/synchronization.hpp @@ -17,7 +17,7 @@ /** * @file synchronization.hpp * @brief This is the header file for `cuda_event_timer`. - **/ + */ /** * @brief This class serves as a wrapper for using `cudaEvent_t` as the user @@ -54,7 +54,7 @@ BENCHMARK(sample_cuda_benchmark)->UseManualTime(); - **/ + */ #ifndef CUDF_BENCH_SYNCHRONIZATION_H #define CUDF_BENCH_SYNCHRONIZATION_H @@ -79,7 +79,7 @@ class cuda_event_timer { * @param[in] flush_l2_cache_ whether or not to flush the L2 cache before * every iteration. * @param[in] stream_ The CUDA stream we are measuring time on. - **/ + */ cuda_event_timer(benchmark::State& state, bool flush_l2_cache, rmm::cuda_stream_view stream = rmm::cuda_stream_default); diff --git a/cpp/docs/TRANSITIONGUIDE.md b/cpp/docs/TRANSITIONGUIDE.md index e1ffd2b6525..8a8a8dc26c4 100644 --- a/cpp/docs/TRANSITIONGUIDE.md +++ b/cpp/docs/TRANSITIONGUIDE.md @@ -777,7 +777,7 @@ namespace experimental{ * @param mr Memory resource used to allocate device memory for the returned * output column * @return std::unique_ptr Newly allocated output column - **/ + */ std::unique_ptr new_function(cudf::column_view input, cudf::mutable_column_view in_out, cudf::table_view input_table, diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp index c9f61490c5d..44910bcaad1 100644 --- a/cpp/include/cudf/ast/detail/linearizer.hpp +++ b/cpp/include/cudf/ast/detail/linearizer.hpp @@ -39,7 +39,6 @@ namespace detail { * * This enum is device-specific. For instance, intermediate data references are generated by the * linearization process but cannot be explicitly created by the user. - * */ enum class device_data_reference_type { COLUMN, // A value in a table column @@ -52,7 +51,6 @@ enum class device_data_reference_type { * * This is a POD class used to create references describing data type and locations for consumption * by the `row_evaluator`. - * */ struct alignas(8) device_data_reference { device_data_reference(device_data_reference_type reference_type, @@ -85,7 +83,6 @@ class linearizer; * * This class is a part of a "visitor" pattern with the `linearizer` class. * Nodes inheriting from this class can accept visitors. - * */ class node { friend class detail::linearizer; @@ -104,7 +101,6 @@ class node { * the nodes and constructing vectors of information that are later used by the device for * evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are * resolved into intermediate data storage in shared memory. - * */ class linearizer { friend class literal; diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp index 536dbb94a52..8ec26cf5eb7 100644 --- a/cpp/include/cudf/ast/detail/operators.hpp +++ b/cpp/include/cudf/ast/detail/operators.hpp @@ -931,7 +931,6 @@ struct dispatch_unary_operator_types { /** * @brief Functor performing a type dispatch for a unary operator. - * */ struct type_dispatch_unary_op { template @@ -968,7 +967,6 @@ CUDA_HOST_DEVICE_CALLABLE constexpr void unary_operator_dispatcher(ast_operator /** * @brief Functor to determine the return type of an operator from its input types. - * */ struct return_type_functor { /** @@ -1057,7 +1055,6 @@ inline cudf::data_type ast_operator_return_type(ast_operator op, /** * @brief Functor to determine the arity (number of operands) of an operator. - * */ struct arity_functor { template diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh index 61aedab2f04..ee08742d871 100644 --- a/cpp/include/cudf/ast/detail/transform.cuh +++ b/cpp/include/cudf/ast/detail/transform.cuh @@ -126,7 +126,6 @@ struct binary_row_output : public row_output { * This class is designed for n-ary transform evaluation. Currently this class assumes that there's * only one relevant "row index" in its methods, which corresponds to a row in a single input table * and the same row index in an output column. - * */ struct row_evaluator { friend struct row_output; diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/linearizer.hpp index 541e16c992a..594dd0a73ce 100644 --- a/cpp/include/cudf/ast/linearizer.hpp +++ b/cpp/include/cudf/ast/linearizer.hpp @@ -32,7 +32,6 @@ namespace ast { * @brief Enum of table references. * * This determines which table to use in cases with two tables (e.g. joins). - * */ enum class table_reference { LEFT, // Column index in the left table @@ -47,7 +46,6 @@ class expression; /** * @brief A literal value used in an abstract syntax tree. - * */ class literal : public detail::node { friend class detail::linearizer; @@ -114,7 +112,6 @@ class literal : public detail::node { /** * @brief A node referring to data from a column in a table. - * */ class column_reference : public detail::node { friend class detail::linearizer; @@ -194,7 +191,6 @@ class column_reference : public detail::node { /** * @brief An expression node holds an operator and zero or more operands. - * */ class expression : public detail::node { friend class detail::linearizer; diff --git a/cpp/include/cudf/ast/operators.hpp b/cpp/include/cudf/ast/operators.hpp index 75c2eac9d8a..78e56340246 100644 --- a/cpp/include/cudf/ast/operators.hpp +++ b/cpp/include/cudf/ast/operators.hpp @@ -21,7 +21,6 @@ namespace ast { /** * @brief Enum of supported operators. - * */ enum class ast_operator { // Binary operators diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 7966b6a1472..a08b10df6f4 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -53,7 +53,7 @@ class column { * @brief Construct a new column by deep copying the contents of `other`. * * @param other The column to copy - **/ + */ column(column const& other); /** @@ -77,7 +77,7 @@ class column { * After the move, `other.size() == 0` and `other.type() = {EMPTY}` * * @param other The column whose contents will be moved into the new column - **/ + */ column(column&& other) noexcept; /** @@ -95,7 +95,7 @@ class column { * `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on * the first invocation of `null_count()`. * @param children Optional, vector of child columns - **/ + */ template column(data_type dtype, size_type size, diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 0f9bcfd5cd9..1672f0d69aa 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -50,7 +50,6 @@ namespace detail { * not-obvious computation of null count, which could lead to undesirable performance issues. * This information is also generally not needed in device code, and on the host-side * is easily accessible from the associated column_view. - * */ class alignas(16) column_device_view_base { public: @@ -795,6 +794,29 @@ __device__ inline numeric::decimal64 const column_device_view::element + word_index(source_begin_bit + + destination_word_index * detail::size_in_bits())) { + next_word = source[source_word_index + 1]; + } + return __funnelshift_r(curr_word, next_word, source_begin_bit); +} + /** * @brief value accessor of column without null bitmask * A unary functor returns scalar value at `id`. diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 081ec0b84cb..7ccc5879f5f 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -583,7 +583,6 @@ std::unique_ptr make_lists_column( * @param[in] null_mask The bits specifying the null struct values in the column. * @param[in] stream Optional stream for use with all memory allocation and device kernels. * @param[in] mr Optional resource to use for device memory allocation. - * */ std::unique_ptr make_structs_column( size_type num_rows, diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index e491cc30c90..d3d64eb21df 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,8 +42,7 @@ namespace detail { * To enable zero-copy slicing, a `column_view_base` has an `offset` that *indicates the index of the first element in the column relative to the base *device memory allocation. By default, `offset()` is zero. - * - **/ + */ class column_view_base { public: /** @@ -58,7 +57,7 @@ class column_view_base { * * @tparam The type to cast to * @return T const* Typed pointer to underlying data - **/ + */ template T const* head() const noexcept { @@ -75,7 +74,7 @@ class column_view_base { * * @tparam T The type to cast to * @return T const* Typed pointer to underlying data, including the offset - **/ + */ template T const* data() const noexcept { @@ -88,7 +87,7 @@ class column_view_base { * * @tparam T The desired type * @return T const* Pointer to the first element after casting - **/ + */ template T const* begin() const noexcept { @@ -101,7 +100,7 @@ class column_view_base { * * @tparam T The desired type * @return T const* Pointer to one past the last element after casting - **/ + */ template T const* end() const noexcept { @@ -110,17 +109,17 @@ class column_view_base { /** * @brief Returns the number of elements in the column - **/ + */ size_type size() const noexcept { return _size; } /** * @brief Returns true if `size()` returns zero, or false otherwise - **/ + */ size_type is_empty() const noexcept { return size() == 0; } /** * @brief Returns the element `data_type` - **/ + */ data_type type() const noexcept { return _type; } /** @@ -131,7 +130,7 @@ class column_view_base { * * @return true The bitmask is allocated * @return false The bitmask is not allocated - **/ + */ bool nullable() const noexcept { return nullptr != _null_mask; } /** @@ -141,7 +140,7 @@ class column_view_base { * point `set_null_count(UNKNOWN_NULL_COUNT)` was invoked, then the * first invocation of `null_count()` will compute and store the count of null * elements indicated by the `null_mask` (if it exists). - **/ + */ size_type null_count() const; /** @@ -156,7 +155,7 @@ class column_view_base { * * @param[in] begin The starting index of the range (inclusive). * @param[in] end The index of the last element in the range (exclusive). - **/ + */ size_type null_count(size_type begin, size_type end) const; /** @@ -165,7 +164,7 @@ class column_view_base { * * @return true One or more elements are null * @return false All elements are valid - **/ + */ bool has_nulls() const { return null_count() > 0; } /** @@ -188,13 +187,13 @@ class column_view_base { * @note This function does *not* account for the `offset()`. * * @note If `null_count() == 0`, this may return `nullptr`. - **/ + */ bitmask_type const* null_mask() const noexcept { return _null_mask; } /** * @brief Returns the index of the first element relative to the base memory * allocation, i.e., what is returned from `head()`. - **/ + */ size_type offset() const noexcept { return _offset; } protected: @@ -278,8 +277,7 @@ class mutable_column_view_base : public column_view_base { * To enable zero-copy slicing, a `column_view` has an `offset` that indicates * the index of the first element in the column relative to the base device * memory allocation. By default, `offset()` is zero. - * - **/ + */ class column_view : public detail::column_view_base { public: column_view() = default; @@ -347,7 +345,7 @@ class column_view : public detail::column_view_base { /** * @brief Returns the number of child columns. - **/ + */ size_type num_children() const noexcept { return _children.size(); } /** @@ -386,8 +384,7 @@ class column_view : public detail::column_view_base { * To enable zero-copy slicing, a `mutable_column_view` has an `offset` that * indicates the index of the first element in the column relative to the base * device memory allocation. By default, `offset()` is zero. - * - **/ + */ class mutable_column_view : public detail::column_view_base { public: mutable_column_view() = default; @@ -448,7 +445,7 @@ class mutable_column_view : public detail::column_view_base { * * @tparam The type to cast to * @return T* Typed pointer to underlying data - **/ + */ template T* head() const noexcept { @@ -465,7 +462,7 @@ class mutable_column_view : public detail::column_view_base { * * @tparam T The type to cast to * @return T* Typed pointer to underlying data, including the offset - **/ + */ template T* data() const noexcept { @@ -478,7 +475,7 @@ class mutable_column_view : public detail::column_view_base { * * @tparam T The desired type * @return T* Pointer to the first element after casting - **/ + */ template T* begin() const noexcept { @@ -491,7 +488,7 @@ class mutable_column_view : public detail::column_view_base { * * @tparam T The desired type * @return T* Pointer to one past the last element after casting - **/ + */ template T* end() const noexcept { @@ -516,7 +513,7 @@ class mutable_column_view : public detail::column_view_base { * @throws cudf::logic_error if `new_null_count > 0` and `nullable() == false` * * @param new_null_count The new null count - **/ + */ void set_null_count(size_type new_null_count); /** @@ -532,7 +529,7 @@ class mutable_column_view : public detail::column_view_base { /** * @brief Returns the number of child columns. - **/ + */ size_type num_children() const noexcept { return mutable_children.size(); } /** @@ -549,7 +546,7 @@ class mutable_column_view : public detail::column_view_base { * @brief Converts a mutable view into an immutable view * * @return column_view An immutable view of the mutable view's elements - **/ + */ operator column_view() const; private: @@ -563,7 +560,7 @@ class mutable_column_view : public detail::column_view_base { * * @param parent The parent whose descendants will be counted * @return size_type The number of descendants of the parent - **/ + */ size_type count_descendants(column_view parent); /** diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 5e3b3673053..c63fa62679f 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -735,7 +735,7 @@ std::unique_ptr get_element( /** * @brief Indicates whether a row can be sampled more than once. - **/ + */ enum class sample_with_replacement : bool { FALSE, // A row can be sampled only once TRUE // A row can be sampled more than once diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 5959ee34e04..1f70e68fce8 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -337,7 +337,7 @@ constexpr size_type ARGMIN_SENTINEL{-1}; * * @tparam Source The type on which the aggregation is computed * @tparam k The aggregation performed - **/ + */ template struct target_type_impl { using type = void; diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 0af8dd6a500..19d9d81a948 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -40,7 +40,7 @@ namespace detail { * @param[in] end Index of the last desired element in the slice (exclusive). * * @return ColumnView View of the elements `[begin,end)` from `input`. - **/ + */ template ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type end) { @@ -92,7 +92,7 @@ std::unique_ptr shift( * @copydoc cudf::contiguous_split * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::vector contiguous_split( cudf::table_view const& input, std::vector const& splits, diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index a1be386006b..adae9b76c5b 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -331,7 +331,6 @@ struct column_gatherer_impl { /** * @brief Function object for gathering a type-erased * column. To be used with the cudf::type_dispatcher. - * */ struct column_gatherer { /** diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index c6d2014f80e..cdc221dcdbe 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -100,7 +100,7 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); * @copydoc cudf::to_arrow * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata = {}, rmm::cuda_stream_view stream = rmm::cuda_stream_default, @@ -110,7 +110,7 @@ std::shared_ptr to_arrow(table_view input, * @copydoc cudf::arrow_to_cudf * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::unique_ptr from_arrow( arrow::Table const& input_table, rmm::cuda_stream_view stream = rmm::cuda_stream_default, diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 3d504b142da..75a710d1d5c 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -/** --------------------------------------------------------------------------* +/** * @brief provides column input iterator with nulls replaced with a specified value * @file iterator.cuh * @@ -29,7 +29,7 @@ * For non-null column, use * auto iter = column.begin(); * - * -------------------------------------------------------------------------**/ + */ #pragma once @@ -39,7 +39,7 @@ namespace cudf { namespace detail { -/** -------------------------------------------------------------------------* +/** * @brief value accessor of column with null bitmask * A unary functor returns scalar value at `id`. * `operator() (cudf::size_type id)` computes `element` and valid flag at `id` @@ -52,17 +52,17 @@ namespace detail { * @throws cudf::logic_error if column datatype and Element type mismatch. * * @tparam Element The type of elements in the column - * -------------------------------------------------------------------------**/ + */ template struct null_replaced_value_accessor { column_device_view const col; ///< column view of column in device Element const null_replacement{}; ///< value returned when element is null - /** -------------------------------------------------------------------------* + /** * @brief constructor * @param[in] _col column device view of cudf column * @param[in] null_replacement The value to return for null elements - * -------------------------------------------------------------------------**/ + */ null_replaced_value_accessor(column_device_view const& _col, Element null_val) : col{_col}, null_replacement{null_val} { @@ -78,21 +78,21 @@ struct null_replaced_value_accessor { } }; -/** -------------------------------------------------------------------------* +/** * @brief validity accessor of column with null bitmask * A unary functor returns validity at `id`. * `operator() (cudf::size_type id)` computes validity flag at `id` * This functor is only allowed for nullable columns. * * @throws cudf::logic_error if the column is not nullable. - * -------------------------------------------------------------------------**/ + */ struct validity_accessor { column_device_view const col; - /** -------------------------------------------------------------------------* + /** * @brief constructor * @param[in] _col column device view of cudf column - * -------------------------------------------------------------------------**/ + */ validity_accessor(column_device_view const& _col) : col{_col} { // verify valid is non-null, otherwise, is_valid() will crash diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index cbf56d19380..06f9bfc5034 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -67,7 +67,6 @@ using index_vector = rmm::device_vector; * As a result, a special comparison logic is necessary whereby the index is "tagged" with side * information and consequently comparator functors (predicates) must operate on these tagged * indices rather than on raw indices. - * */ template struct tagged_element_relational_comparator { diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 9a5e000f265..2f2bc91cb74 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -28,7 +28,7 @@ namespace detail { * @copydoc cudf::create_null_mask(size_type, mask_state, rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ rmm::device_buffer create_null_mask( size_type size, mask_state state, @@ -39,7 +39,7 @@ rmm::device_buffer create_null_mask( * @copydoc cudf::set_null_mask(bitmask_type*, size_type, size_type, bool) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ void set_null_mask(bitmask_type *bitmask, size_type begin_bit, size_type end_bit, @@ -69,7 +69,7 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, *rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ rmm::device_buffer copy_bitmask( bitmask_type const *mask, size_type begin_bit, @@ -81,7 +81,7 @@ rmm::device_buffer copy_bitmask( * @copydoc cudf::copy_bitmask(column_view const& view, rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ rmm::device_buffer copy_bitmask( column_view const &view, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/detail/nvtx/nvtx3.hpp b/cpp/include/cudf/detail/nvtx/nvtx3.hpp index 4ef665a8f2e..add5699e34a 100644 --- a/cpp/include/cudf/detail/nvtx/nvtx3.hpp +++ b/cpp/include/cudf/detail/nvtx/nvtx3.hpp @@ -27,7 +27,6 @@ * * If this value is incremented, the above version include guard needs to be * updated. - * */ #define NVTX3_MINOR_VERSION 0 @@ -496,7 +495,6 @@ * NVTX3_FUNC_RANGE(); * } * \endcode - * */ /** @@ -506,7 +504,6 @@ * Initializing a legacy-C (i.e., no constructor) union member requires * initializing in the constructor body. Non-empty constexpr constructors * require C++14 relaxed constexpr. - * */ #if __cpp_constexpr >= 201304L #define NVTX3_RELAXED_CONSTEXPR constexpr @@ -741,7 +738,6 @@ class domain { * will be grouped together. * * @return Reference to the `domain` corresponding to the global NVTX domain. - * */ template <> inline domain const& domain::get() @@ -753,7 +749,6 @@ inline domain const& domain::get() /** * @brief Indicates the values of the red, green, blue color channels for * a rgb color code. - * */ struct rgb { /// Type used for component values @@ -782,7 +777,6 @@ struct rgb { /** * @brief Indicates the value of the alpha, red, green, and blue color * channels for an argb color code. - * */ struct argb final : rgb { /** @@ -815,7 +809,6 @@ struct argb final : rgb { * Specifying colors for NVTX events is a convenient way to visually * differentiate among different events in a visualization tool such as Nsight * Systems. - * */ class color { public: @@ -921,7 +914,6 @@ class color { * \endcode * * To associate a name string with a category id, see `named_category`. - * */ class category { public: @@ -1537,7 +1529,6 @@ class payload { * // they will be forwarded to the `EventAttribute`s constructor * nvtx3::thread_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"}; * \endcode - * */ class event_attributes { public: @@ -1763,7 +1754,6 @@ class domain_thread_range { /** * @brief Alias for a `domain_thread_range` in the global NVTX domain. - * */ using thread_range = domain_thread_range<>; @@ -1854,7 +1844,6 @@ class domain_process_range { /** * @brief Alias for a `domain_process_range` in the global NVTX domain. - * */ using process_range = domain_process_range<>; diff --git a/cpp/include/cudf/detail/nvtx/ranges.hpp b/cpp/include/cudf/detail/nvtx/ranges.hpp index 10f5916cde1..de5f9901506 100644 --- a/cpp/include/cudf/detail/nvtx/ranges.hpp +++ b/cpp/include/cudf/detail/nvtx/ranges.hpp @@ -21,7 +21,6 @@ namespace cudf { /** * @brief Tag type for libcudf's NVTX domain. - * */ struct libcudf_domain { static constexpr char const* name{"libcudf"}; ///< Name of the libcudf domain @@ -29,7 +28,6 @@ struct libcudf_domain { /** * @brief Alias for an NVTX range in the libcudf domain. - * */ using thread_range = ::nvtx3::domain_thread_range; @@ -49,6 +47,5 @@ using thread_range = ::nvtx3::domain_thread_range; * ... * } * ``` - * */ #define CUDF_FUNC_RANGE() NVTX3_FUNC_RANGE_IN(cudf::libcudf_domain) diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index 9d20375e8b1..2c2b259f1fe 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -33,7 +33,7 @@ namespace cudf { namespace reduction { namespace detail { -/** --------------------------------------------------------------------------* +/** * @brief Compute the specified simple reduction over the input range of elements. * * @param[in] d_in the begin iterator @@ -45,7 +45,7 @@ namespace detail { * @tparam Op the reduction operator with device binary operator * @tparam InputIterator the input column iterator * @tparam OutputType the output type of reduction - * ----------------------------------------------------------------------------**/ + */ template ::type, @@ -148,7 +148,7 @@ std::unique_ptr reduce(InputIterator d_in, return std::unique_ptr(s); } -/** --------------------------------------------------------------------------* +/** * @brief compute reduction by the compound operator (reduce and transform) * * @param[in] d_in the begin iterator @@ -166,7 +166,7 @@ std::unique_ptr reduce(InputIterator d_in, * @tparam Op the reduction operator with device binary operator * @tparam InputIterator the input column iterator * @tparam OutputType the output type of reduction - * ----------------------------------------------------------------------------**/ + */ template { * @param[in] mr Device memory resource used to allocate the returned table's device memory * * @return Result of scattering values from source to target - **/ + */ template std::unique_ptr
scatter( table_view const& source, diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index a5676c86f49..a3b1f95ca0a 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -61,7 +61,7 @@ namespace detail { * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target - **/ + */ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, @@ -101,7 +101,7 @@ std::unique_ptr
scatter( * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target - **/ + */ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index c3bbb734476..6ba46219166 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -30,7 +30,7 @@ namespace detail { *rmm::mr::get_current_device_resource()) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::unique_ptr sequence( size_type size, scalar const& init, @@ -44,7 +44,7 @@ std::unique_ptr sequence( rmm::mr::get_current_device_resource()) * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::unique_ptr sequence( size_type size, scalar const& init, diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 0309542d01f..bea480d85cd 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -26,7 +26,7 @@ namespace detail { * @copydoc cudf::transform * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::unique_ptr transform( column_view const& input, std::string const& unary_udf, @@ -39,7 +39,7 @@ std::unique_ptr transform( * @copydoc cudf::nans_to_nulls * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::pair, size_type> nans_to_nulls( column_view const& input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, @@ -49,7 +49,7 @@ std::pair, size_type> nans_to_nulls( * @copydoc cudf::bools_to_mask * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::pair, cudf::size_type> bools_to_mask( column_view const& input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, @@ -59,7 +59,7 @@ std::pair, cudf::size_type> bools_to_mask( * @copydoc cudf::encode * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::pair, std::unique_ptr> encode( cudf::table_view const& input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, @@ -69,7 +69,7 @@ std::pair, std::unique_ptr> encode( * @copydoc cudf::mask_to_bools * * @param stream CUDA stream used for device memory operations and kernel launches. - **/ + */ std::unique_ptr mask_to_bools( bitmask_type const* null_mask, size_type begin_bit, diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index a2f73d9a0e9..246817a5cb5 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -16,7 +16,7 @@ #pragma once -/** ---------------------------------------------------------------------------* +/** * @brief overloads for CUDA atomic operations * @file device_atomics.cuh * @@ -30,7 +30,7 @@ * `atomicAnd`, `atomicOr`, `atomicXor` are also supported for integer data types. * Also provides `cudf::genericAtomicOperation` which performs atomic operation * with the given binary operator. - * ---------------------------------------------------------------------------**/ + */ #include #include @@ -412,7 +412,7 @@ struct typesAtomicCASImpl { } // namespace detail -/** -------------------------------------------------------------------------* +/** * @brief compute atomic binary operation * reads the `old` located at the `address` in global or shared memory, * computes 'BinaryOp'('old', 'update_value'), @@ -427,7 +427,7 @@ struct typesAtomicCASImpl { * @param[in] op The binary operator used for compute * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template typename std::enable_if_t(), T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) @@ -476,7 +476,7 @@ __forceinline__ __device__ bool genericAtomicOperation(bool* address, } // namespace cudf -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicAdd` * reads the `old` located at the `address` in global or shared memory, * computes (old + val), and stores the result back to memory at the same @@ -496,14 +496,14 @@ __forceinline__ __device__ bool genericAtomicOperation(bool* address, * @param[in] val The value to be added * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template __forceinline__ __device__ T atomicAdd(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{}); } -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicMin` * reads the `old` located at the `address` in global or shared memory, * computes the minimum of old and val, and stores the result back to memory @@ -522,14 +522,14 @@ __forceinline__ __device__ T atomicAdd(T* address, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template __forceinline__ __device__ T atomicMin(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceMin{}); } -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicMax` * reads the `old` located at the `address` in global or shared memory, * computes the maximum of old and val, and stores the result back to memory @@ -548,14 +548,14 @@ __forceinline__ __device__ T atomicMin(T* address, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template __forceinline__ __device__ T atomicMax(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceMax{}); } -/** --------------------------------------------------------------------------* +/** * @brief Overloads for `atomicCAS` * reads the `old` located at the `address` in global or shared memory, * computes (`old` == `compare` ? `val` : `old`), @@ -575,14 +575,14 @@ __forceinline__ __device__ T atomicMax(T* address, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template __forceinline__ __device__ T atomicCAS(T* address, T compare, T val) { return cudf::detail::typesAtomicCASImpl()(address, compare, val); } -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicAnd` * reads the `old` located at the `address` in global or shared memory, * computes (old & val), and stores the result back to memory at the same @@ -596,14 +596,14 @@ __forceinline__ __device__ T atomicCAS(T* address, T compare, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template ::value, T>* = nullptr> __forceinline__ __device__ T atomicAnd(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceAnd{}); } -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicOr` * reads the `old` located at the `address` in global or shared memory, * computes (old | val), and stores the result back to memory at the same @@ -617,14 +617,14 @@ __forceinline__ __device__ T atomicAnd(T* address, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template ::value, T>* = nullptr> __forceinline__ __device__ T atomicOr(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceOr{}); } -/** -------------------------------------------------------------------------* +/** * @brief Overloads for `atomicXor` * reads the `old` located at the `address` in global or shared memory, * computes (old ^ val), and stores the result back to memory at the same @@ -638,7 +638,7 @@ __forceinline__ __device__ T atomicOr(T* address, T val) * @param[in] val The value to be computed * * @returns The old value at `address` - * -------------------------------------------------------------------------**/ + */ template ::value, T>* = nullptr> __forceinline__ __device__ T atomicXor(T* address, T val) { diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 7245bee1aa3..659f0d00d6f 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -17,11 +17,10 @@ #ifndef DEVICE_OPERATORS_CUH #define DEVICE_OPERATORS_CUH -/** ---------------------------------------------------------------------------* +/** * @brief definition of the device operators * @file device_operators.cuh - * - * ---------------------------------------------------------------------------**/ + */ #include #include @@ -94,7 +93,6 @@ struct DeviceCount { * character. This serves as identity value for maximum operator on string * values. Also, this char pointer serves as valid device pointer of identity * value for minimum operator on string values. - * */ __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"}; diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 5f7728b8c1e..e9d66d125dd 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -388,7 +388,6 @@ struct MurmurHash3_32 { return h; } - /* --------------------------------------------------------------------------*/ /** * @brief Combines two hash values into a new single hash value. Called * repeatedly to create a hash value from several variables. @@ -400,7 +399,6 @@ struct MurmurHash3_32 { * * @returns A hash value that intelligently combines the lhs and rhs hash values */ - /* ----------------------------------------------------------------------------*/ CUDA_DEVICE_CALLABLE result_type hash_combine(result_type lhs, result_type rhs) { result_type combined{lhs}; @@ -694,17 +692,14 @@ SparkMurmurHash3_32::operator()(double const& key) const return this->compute_floating_point(key); } -/* --------------------------------------------------------------------------*/ /** * @brief This hash function simply returns the value that is asked to be hash - reinterpreted as the result_type of the functor. + * reinterpreted as the result_type of the functor. */ -/* ----------------------------------------------------------------------------*/ template struct IdentityHash { using result_type = hash_value_type; - /* --------------------------------------------------------------------------*/ /** * @brief Combines two hash values into a new single hash value. Called * repeatedly to create a hash value from several variables. @@ -716,7 +711,6 @@ struct IdentityHash { * * @returns A hash value that intelligently combines the lhs and rhs hash values */ - /* ----------------------------------------------------------------------------*/ CUDA_HOST_DEVICE_CALLABLE result_type hash_combine(result_type lhs, result_type rhs) const { result_type combined{lhs}; diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index ca40d7516e8..dc919433da7 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -20,7 +20,6 @@ /** * @file Utility code involving integer arithmetic - * */ #include diff --git a/cpp/include/cudf/detail/utilities/release_assert.cuh b/cpp/include/cudf/detail/utilities/release_assert.cuh index 2ca32fdcb8b..e0db88d8fcb 100644 --- a/cpp/include/cudf/detail/utilities/release_assert.cuh +++ b/cpp/include/cudf/detail/utilities/release_assert.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,7 @@ * regardless of the state of `NDEBUG`. * * Relies on the `__PRETTY_FUNCTION__` macro which is specific to GCC and Clang. - * - **/ + */ #if defined(__CUDA_ARCH__) && (defined(__clang__) || defined(__GNUC__)) #define __ASSERT_STR_HELPER(x) #x #define release_assert(e) \ diff --git a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh index a3da7a36b90..8c0abbad49f 100644 --- a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh +++ b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -/** --------------------------------------------------------------------------* +/** * @brief unary functions for thrust::transform_iterator * @file transform_unary_functions.cuh * @@ -22,7 +22,7 @@ * for thrust::transform_iterator. * For the detail of example cases, * @see iterator.cuh iterator_test.cu - * -------------------------------------------------------------------------**/ + */ #pragma once @@ -59,7 +59,7 @@ struct null_replacing_transformer { } }; -/** -------------------------------------------------------------------------* +/** * @brief intermediate struct to calculate mean and variance * This is an example case to output a struct from column input. * @@ -69,7 +69,7 @@ struct null_replacing_transformer { * and `variance` (= sum of squares / count - mean^2). * * @tparam ElementType element data type of value and value_squared. - * -------------------------------------------------------------------------**/ + */ template struct meanvar { ElementType value; /// the value @@ -101,7 +101,7 @@ struct meanvar { // -------------------------------------------------------------------------- // transformers -/** -------------------------------------------------------------------------* +/** * @brief Transforms a scalar by first casting to another type, and then squaring the result. * * This struct transforms the output value as @@ -110,14 +110,14 @@ struct meanvar { * This will be used to compute "sum of squares". * * @tparam ResultType scalar data type of output - * -------------------------------------------------------------------------**/ + */ template struct transformer_squared { CUDA_HOST_DEVICE_CALLABLE ElementType operator()(ElementType const &value) { return (value * value); }; }; -/** -------------------------------------------------------------------------* +/** * @brief Uses a scalar value to construct a `meanvar` object. * This transforms `thrust::pair` into * `ResultType = meanvar` form. @@ -125,7 +125,7 @@ struct transformer_squared { * This struct transforms the value and the squared value and the count at once. * * @tparam ElementType scalar data type of input - * -------------------------------------------------------------------------**/ + */ template struct transformer_meanvar { using ResultType = meanvar; diff --git a/cpp/include/cudf/detail/utilities/trie.cuh b/cpp/include/cudf/detail/utilities/trie.cuh index ab43366aa3c..5370c8678cf 100644 --- a/cpp/include/cudf/detail/utilities/trie.cuh +++ b/cpp/include/cudf/detail/utilities/trie.cuh @@ -17,7 +17,6 @@ /** * @brief Serialized trie implementation for C++/CUDA * @file trie.cuh - * */ #pragma once @@ -89,8 +88,12 @@ inline thrust::host_vector createSerializedTrie( // Serialize the tree trie std::deque to_visit; thrust::host_vector nodes; - // suport for matching empty input + + // If the Tree trie matches empty strings, the root node is marked as 'end of word'. + // The first node in the serialized trie is also used to match empty strings, so we're + // initializing it using the `is_end_of_word` value from the root node. nodes.push_back(SerialTrieNode(trie_terminating_character, tree_trie.is_end_of_word)); + // Add root node to queue. this node is not included to the serialized trie to_visit.emplace_back(&tree_trie, -1); while (!to_visit.empty()) { diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index f8f3ba51468..c685837ae2b 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -113,7 +113,7 @@ std::pair valid_if( return std::make_pair(std::move(null_mask), null_count); } -/**----------------------------------------------------------------------------* +/** * @brief Populates a set of bitmasks by applying a binary predicate to two * input ranges. @@ -146,7 +146,7 @@ std::pair valid_if( * remaining bits may not be initialized. * @param valid_counts Used to obtain the total number of valid bits for each * mask. - **/ + */ template repeat( * @param step Increment value * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr The result table containing the sequence - **/ + */ std::unique_ptr sequence( size_type size, scalar const& init, @@ -197,7 +197,7 @@ std::unique_ptr sequence( * @param init First value in the sequence * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr The result table containing the sequence - **/ + */ std::unique_ptr sequence( size_type size, scalar const& init, diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index fc809b03dfa..f7f7f51479d 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -62,7 +62,6 @@ struct aggregation_request { * For every `aggregation_request` given to `groupby::aggregate` an * `aggregation_result` will be returned. The `aggregation_result` holds the * resulting column(s) for each requested aggregation on the `request`s values. - * */ struct aggregation_result { /// Columns of results from an `aggregation_request` diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 042a85cfd6e..9dbde1432aa 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -109,7 +109,7 @@ struct column_metadata { * @param metadata Contains hierarchy of names of columns and children * @param ar_mr arrow memory pool to allocate memory for arrow Table * @return arrow Table generated from `input` - **/ + */ std::shared_ptr to_arrow(table_view input, std::vector const& metadata = {}, arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); @@ -120,7 +120,7 @@ std::shared_ptr to_arrow(table_view input, * @param input arrow:Table that needs to be converted to `cudf::table` * @param mr Device memory resource used to allocate `cudf::table` * @return cudf table generated from given arrow Table. - **/ + */ std::unique_ptr
from_arrow( arrow::Table const& input, diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp index 6c830e31a56..0ae403458a0 100644 --- a/cpp/include/cudf/io/data_sink.hpp +++ b/cpp/include/cudf/io/data_sink.hpp @@ -30,21 +30,21 @@ namespace cudf { namespace io { /** * @brief Interface class for storing the output data from the writers - **/ + */ class data_sink { public: /** * @brief Create a sink from a file path * * @param[in] filepath Path to the file to use - **/ + */ static std::unique_ptr create(const std::string& filepath); /** * @brief Create a sink from a std::vector * * @param[in,out] buffer Pointer to the output vector - **/ + */ static std::unique_ptr create(std::vector* buffer); /** @@ -53,7 +53,7 @@ class data_sink { * A useful code path for benchmarking, to eliminate physical * hardware randomness from profiling. * - **/ + */ static std::unique_ptr create(); /** @@ -65,12 +65,12 @@ class data_sink { * class that wraps the user pointer. The principle is to allow the user to declare * a custom sink instance and use it across multiple write() calls. * - **/ + */ static std::unique_ptr create(cudf::io::data_sink* const user_sink); /** * @brief Base class destructor - **/ + */ virtual ~data_sink(){}; /** @@ -80,7 +80,7 @@ class data_sink { * @param[in] size Number of bytes to write * * @return void - **/ + */ virtual void host_write(void const* data, size_t size) = 0; /** @@ -104,7 +104,7 @@ class data_sink { * write() calls as well. * * @return bool If this writer supports device_write() calls. - **/ + */ virtual bool supports_device_write() const { return false; } /** @@ -114,7 +114,7 @@ class data_sink { * @param[in] size Number of bytes to write * * @return void - **/ + */ virtual void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) { CUDF_FAIL("data_sink classes that support device_write must override this function."); @@ -131,7 +131,7 @@ class data_sink { * @brief Returns the total number of bytes written into this sink * * @return size_t Total number of bytes written into this sink - **/ + */ virtual size_t bytes_written() = 0; }; diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 262d79b64c2..7d56c1c0fc6 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -61,7 +61,6 @@ class json_reader_options_builder; * | `date_unit` | only millisecond units are supported | * | `encoding` | only ASCII-encoded data is supported | * | `chunksize` | use `byte_range_xxx` for chunking instead | - * */ class json_reader_options { source_info _source; diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 8d1e192cee0..661b36f10c8 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -148,7 +148,6 @@ struct table_metadata { * * In the case where column nullability is known, pass `true` if the corresponding column could * contain nulls in one or more subtables to be written, otherwise `false`. - * */ struct table_metadata_with_nullability : public table_metadata { std::vector column_nullable; //!< Per-column nullability information. diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 37847c41339..b2c1296ccef 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -394,10 +394,12 @@ class hash_join { * * @param build The build table, from which the hash table is built. * @param build_on The column indices from `build` to join on. + * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches */ hash_join(cudf::table_view const& build, std::vector const& build_on, + null_equality compare_nulls, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index cde07b9427d..0dcc4e2b37c 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -232,7 +232,6 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, * * @returns The gather_data struct needed to construct the gather map for the * next level of recursion. - * */ template gather_data make_gather_data(cudf::lists_column_view const& source_column, @@ -261,7 +260,6 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, * @param mr Memory resource to use for all allocations * * @returns column with elements gathered based on `gather_data` - * */ std::unique_ptr gather_list_nested( lists_column_view const& list, @@ -280,7 +278,6 @@ std::unique_ptr gather_list_nested( * @param mr Memory resource to use for all allocations * * @returns column with elements gathered based on `gather_data` - * */ std::unique_ptr gather_list_leaf( column_view const& column, diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh index be6bf88da30..38708d4878e 100644 --- a/cpp/include/cudf/lists/list_device_view.cuh +++ b/cpp/include/cudf/lists/list_device_view.cuh @@ -24,7 +24,6 @@ namespace cudf { /** * @brief A non-owning, immutable view of device data that represents * a list of elements of arbitrary type (including further nested lists). - * */ class list_device_view { using lists_column_device_view = cudf::detail::lists_column_device_view; diff --git a/cpp/include/cudf/lists/list_view.cuh b/cpp/include/cudf/lists/list_view.cuh index 898b274781a..9af722e444b 100644 --- a/cpp/include/cudf/lists/list_view.cuh +++ b/cpp/include/cudf/lists/list_view.cuh @@ -25,7 +25,6 @@ namespace cudf { /** * @brief A non-owning, immutable view of device data that represents * a list of elements of arbitrary type (including further nested lists). - * */ class list_view { }; diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 690f4cdbbb0..5e1f0f0802e 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -37,7 +37,7 @@ namespace cudf { * @param state The state of the null mask * @param size The number of elements represented by the mask * @return size_type The count of null elements - **/ + */ size_type state_null_count(mask_state state, size_type size); /** @@ -51,7 +51,7 @@ size_type state_null_count(mask_state state, size_type size); * @param padding_boundary The value returned will be rounded up to a multiple * of this value * @return std::size_t The necessary number of bytes - **/ + */ std::size_t bitmask_allocation_size_bytes(size_type number_of_bits, std::size_t padding_boundary = 64); @@ -79,7 +79,7 @@ size_type num_bitmask_words(size_type number_of_bits); * @param mr Device memory resource used to allocate the returned device_buffer. * @return rmm::device_buffer A `device_buffer` for use as a null bitmask * satisfying the desired size and state - **/ + */ rmm::device_buffer create_null_mask( size_type size, mask_state state, @@ -96,7 +96,7 @@ rmm::device_buffer create_null_mask( * @param begin_bit Index of the first bit to set (inclusive) * @param end_bit Index of the last bit to set (exclusive) * @param valid If true set all entries to valid; otherwise, set all to null. - **/ + */ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid); /** @@ -112,7 +112,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit * @param start_bit Index of the first bit to count (inclusive) * @param stop_bit Index of the last bit to count (exclusive) * @return The number of non-zero bits in the specified range - **/ + */ cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, size_type stop); /** @@ -128,7 +128,7 @@ cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, siz * @param start_bit Index of the first bit to count (inclusive) * @param stop_bit Index of the last bit to count (exclusive) * @return The number of zero bits in the specified range - **/ + */ cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, size_type stop); /** @@ -184,7 +184,7 @@ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer A `device_buffer` containing the bits * `[begin_bit, end_bit)` from `mask`. - **/ + */ rmm::device_buffer copy_bitmask( bitmask_type const* mask, size_type begin_bit, @@ -201,7 +201,7 @@ rmm::device_buffer copy_bitmask( * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer A `device_buffer` containing the bits * `[view.offset(), view.offset() + view.size())` from `view`'s bitmask. - **/ + */ rmm::device_buffer copy_bitmask( column_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index ecbf999aa92..44a64a01c5e 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -52,7 +52,7 @@ namespace cudf { * @param[in] agg The rolling window aggregation type (SUM, MAX, MIN, etc.) * * @returns A nullable output column containing the rolling window results - **/ + */ std::unique_ptr rolling_window( column_view const& input, size_type preceding_window, @@ -188,7 +188,7 @@ struct window_bounds { * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) * * @returns A nullable output column containing the rolling window results - **/ + */ std::unique_ptr grouped_rolling_window( table_view const& group_keys, column_view const& input, diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index c9002e5a9a4..de01f4b860f 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -330,26 +330,26 @@ class fixed_point_scalar : public scalar { bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id(), 0}, is_valid, stream, mr}, + : scalar{data_type{type_to_id(), _type.scale()}, is_valid, stream, mr}, _data{numeric::scaled_integer{value}.value} { - CUDF_EXPECTS(value == (T{_data.value(), numeric::scale_type{0}}), - "scale of fixed_point value should be zero"); } /** * @brief Construct a new fixed_point scalar object from existing device memory. * - * @param[in] data The scalar's data in device memory - * @param[in] is_valid Whether the value held by the scalar is valid - * @param[in] stream CUDA stream used for device memory operations. - * @param[in] mr Device memory resource to use for device memory allocation + * @param[in] data The scalar's data in device memory + * @param[in] scale The scale of the fixed_point scalar + * @param[in] is_valid Whether the value held by the scalar is valid + * @param[in] stream CUDA stream used for device memory operations. + * @param[in] mr Device memory resource to use for device memory allocation */ fixed_point_scalar(rmm::device_scalar&& data, + numeric::scale_type scale, bool is_valid = true, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) - : scalar{data_type{type_to_id()}, is_valid, stream, mr}, + : scalar{data_type{type_to_id(), scale}, is_valid, stream, mr}, _data{std::forward>(data)} { } diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index a4f404b5d19..aa3cd932f4f 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -214,9 +214,14 @@ class fixed_point_scalar_device_view : public detail::scalar_device_view_base { using rep_type = typename T::rep; fixed_point_scalar_device_view(data_type type, rep_type* data, bool* is_valid) - : detail::scalar_device_view_base(type, is_valid) + : detail::scalar_device_view_base(type, is_valid), _data(data) { } + + __device__ void set_value(rep_type value) { *_data = value; } + + private: + rep_type* _data{}; }; /** @@ -310,4 +315,13 @@ auto get_scalar_device_view(duration_scalar& s) return duration_scalar_device_view(s.type(), s.data(), s.validity_data()); } +/** + * @brief Get the device view of a fixed_point_scalar + */ +template +auto get_scalar_device_view(fixed_point_scalar& s) +{ + return fixed_point_scalar_device_view(s.type(), s.data(), s.validity_data()); +} + } // namespace cudf diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 8fbecc2e815..b9c35912a8f 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -17,9 +17,7 @@ #include #include -#include -#include -#include +#include #include #include @@ -75,22 +73,38 @@ std::unique_ptr gather( auto d_strings = *strings_column; // build offsets column - auto offsets_transformer = [d_strings, strings_count] __device__(size_type idx) { - if (NullifyOutOfBounds && ((idx < 0) || (idx >= strings_count))) return 0; - if (d_strings.is_null(idx)) return 0; - return d_strings.element(idx).size_bytes(); - }; - auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + output_count, stream, mr); - auto offsets_view = offsets_column->view(); - auto d_offsets = offsets_view.template data(); + auto offsets_column = make_numeric_column( + data_type{type_id::INT32}, output_count + 1, mask_state::UNALLOCATED, stream, mr); + auto d_offsets = offsets_column->mutable_view().template data(); + thrust::transform(rmm::exec_policy(stream), + begin, + end, + d_offsets, + [d_strings, strings_count] __device__(size_type idx) { + if (NullifyOutOfBounds && ((idx < 0) || (idx >= strings_count))) return 0; + if (d_strings.is_null(idx)) return 0; + return d_strings.element(idx).size_bytes(); + }); + + // check total size is not too large + size_t total_bytes = thrust::transform_reduce( + rmm::exec_policy(stream), + d_offsets, + d_offsets + output_count, + [] __device__(auto size) { return static_cast(size); }, + size_t{0}, + thrust::plus{}); + CUDF_EXPECTS(total_bytes < std::numeric_limits::max(), + "total size of output strings is too large for a cudf column"); + + // create offsets from sizes + thrust::exclusive_scan( + rmm::exec_policy(stream), d_offsets, d_offsets + output_count + 1, d_offsets); // build chars column - size_type bytes = thrust::device_pointer_cast(d_offsets)[output_count]; + size_type bytes = static_cast(total_bytes); auto chars_column = create_chars_child_column(output_count, 0, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.template data(); + auto d_chars = chars_column->mutable_view().template data(); // fill in chars auto gather_chars = [d_strings, begin, strings_count, d_offsets, d_chars] __device__(size_type idx) { diff --git a/cpp/include/cudf/strings/sorting.hpp b/cpp/include/cudf/strings/sorting.hpp index 84ce2e4ec2b..399625e3265 100644 --- a/cpp/include/cudf/strings/sorting.hpp +++ b/cpp/include/cudf/strings/sorting.hpp @@ -26,7 +26,7 @@ namespace detail { /** * @brief Sort types for the sort method. - **/ + */ enum sort_type { none = 0, ///< no sorting length = 1, ///< sort by string length diff --git a/cpp/include/cudf/structs/struct_view.hpp b/cpp/include/cudf/structs/struct_view.hpp index 778ffccf7b4..18f0384118b 100644 --- a/cpp/include/cudf/structs/struct_view.hpp +++ b/cpp/include/cudf/structs/struct_view.hpp @@ -26,7 +26,6 @@ namespace cudf { * @brief A non-owning, immutable view of device data that represents * a struct with fields of arbitrary types (including primitives, lists, * and other structs) - * */ class struct_view { }; diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 75c2340e51b..d9840e78be2 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,7 @@ namespace cudf { * * Equivalence is defined as `not (a __device__ weak_ordering compare_elements(Element lhs, Element rhs) { @@ -140,7 +139,7 @@ __device__ bool equality_compare(Element const lhs, Element const rhs) * @brief Performs an equality comparison between two elements in two columns. * * @tparam has_nulls Indicates the potential for null values in either column. - **/ + */ template class element_equality_comparator { public: @@ -153,7 +152,7 @@ class element_equality_comparator { * @param lhs The column containing the first element * @param rhs The column containing the second element (may be the same as lhs) * @param nulls_are_equal Indicates if two null elements are treated as equivalent - **/ + */ __host__ __device__ element_equality_comparator(column_device_view lhs, column_device_view rhs, bool nulls_are_equal = true) @@ -232,7 +231,7 @@ class row_equality_comparator { * @brief Performs a relational comparison between two elements in two columns. * * @tparam has_nulls Indicates the potential for null values in either column. - **/ + */ template class element_relational_comparator { public: @@ -246,7 +245,7 @@ class element_relational_comparator { * @param rhs The column containing the second element (may be the same as lhs) * @param null_precedence Indicates how null values are ordered with other * values - **/ + */ __host__ __device__ element_relational_comparator(column_device_view lhs, column_device_view rhs, null_order null_precedence) @@ -316,7 +315,7 @@ class element_relational_comparator { * `aac < abb`. * * @tparam has_nulls Indicates the potential for null values in either row. - **/ + */ template class row_lexicographic_comparator { public: @@ -388,7 +387,7 @@ class row_lexicographic_comparator { * * @tparam hash_function Hash functor to use for hashing elements. * @tparam has_nulls Indicates the potential for null values in the column. - **/ + */ template