diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 835c47a6e7e..d3c2359d1d4 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -818,6 +818,12 @@ INPUT = main_page.md \ regex.md \ unicode.md \ ../include \ + ../include/cudf_test/column_wrapper.hpp \ + ../include/cudf_test/column_utilities.hpp \ + ../include/cudf_test/iterator_utilities.hpp \ + ../include/cudf_test/table_utilities.hpp \ + ../include/cudf_test/type_lists.hpp \ + ../include/cudf_test/type_list_utilities.hpp \ ../libcudf_kafka/include # This tag can be used to specify the character encoding of the source files @@ -881,7 +887,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = */nvtx/* */detail/* +EXCLUDE_PATTERNS = */nvtx/* */detail/* */cudf_test/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 8d8c83bd2a4..1f31c21bff9 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1232,6 +1232,7 @@ __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restri /** * @brief value accessor of column without null bitmask + * * A unary functor returns scalar value at `id`. * `operator() (cudf::size_type id)` computes `element` * This functor is only allowed for non-nullable columns. @@ -1249,6 +1250,7 @@ struct value_accessor { /** * @brief constructor + * * @param[in] _col column device view of cudf column */ value_accessor(column_device_view const& _col) : col{_col} @@ -1328,6 +1330,7 @@ struct optional_accessor { /** * @brief pair accessor of column with/without null bitmask + * * A unary functor returns pair with scalar value at `id` and boolean validity * `operator() (cudf::size_type id)` computes `element` and * returns a `pair(element, validity)` @@ -1350,6 +1353,7 @@ struct pair_accessor { /** * @brief constructor + * * @param[in] _col column device view of cudf column */ pair_accessor(column_device_view const& _col) : col{_col} @@ -1372,6 +1376,7 @@ struct pair_accessor { /** * @brief pair accessor of column with/without null bitmask + * * A unary functor returns pair with representative scalar value at `id` and boolean validity * `operator() (cudf::size_type id)` computes `element` and * returns a `pair(element, validity)` @@ -1396,6 +1401,7 @@ struct pair_rep_accessor { /** * @brief constructor + * * @param[in] _col column device view of cudf column */ pair_rep_accessor(column_device_view const& _col) : col{_col} @@ -1446,6 +1452,7 @@ struct mutable_value_accessor { /** * @brief Constructor + * * @param[in] _col mutable column device view of cudf column */ mutable_value_accessor(mutable_column_device_view& _col) : col{_col} diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index bc64dbe6cd4..0535150cf03 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -86,6 +86,7 @@ std::unique_ptr gather( /** * @brief Reverses the rows within a table. + * * Creates a new table that is the reverse of @p source_table. * Example: * ``` @@ -103,6 +104,7 @@ std::unique_ptr
reverse( /** * @brief Reverses the elements of a column + * * Creates a new column that is the reverse of @p source_column. * Example: * ``` @@ -229,6 +231,7 @@ std::unique_ptr empty_like(scalar const& input); /** * @brief Creates an uninitialized new column of the same size and type as the `input`. + * * Supports only fixed-width types. * * @param[in] input Immutable view of input column to emulate @@ -244,6 +247,7 @@ std::unique_ptr allocate_like( /** * @brief Creates an uninitialized new column of the specified size and same type as the `input`. + * * Supports only fixed-width types. * * @param[in] input Immutable view of input column to emulate diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 27821fe5526..f794719d2d6 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -526,8 +526,11 @@ class parquet_writer_options { auto get_row_group_size_rows() const { return _row_group_size_rows; } /** - * @brief Returns the maximum uncompressed page size, in bytes. If set larger than the row group - * size, then this will return the row group size. + * @brief Returns the maximum uncompressed page size, in bytes. + * + * If set larger than the row group size, then this will return the row group size. + * + * @return Maximum uncompressed page size, in bytes */ auto get_max_page_size_bytes() const { @@ -535,8 +538,11 @@ class parquet_writer_options { } /** - * @brief Returns maximum page size, in rows. If set larger than the row group size, then this - * will return the row group size. + * @brief Returns maximum page size, in rows. + * + * If set larger than the row group size, then this will return the row group size. + * + * @return Maximum page size, in rows */ auto get_max_page_size_rows() const { @@ -638,6 +644,8 @@ class parquet_writer_options { /** * @brief Sets the maximum uncompressed page size, in bytes. + * + * @param size_bytes Maximum uncompressed page size, in bytes to set */ void set_max_page_size_bytes(size_t size_bytes) { @@ -647,6 +655,8 @@ class parquet_writer_options { /** * @brief Sets the maximum page size, in rows. + * + * @param size_rows Maximum page size, in rows to set */ void set_max_page_size_rows(size_type size_rows) { @@ -788,9 +798,11 @@ class parquet_writer_options_builder { } /** - * @brief Sets the maximum uncompressed page size, in bytes. Serves as a hint to the writer, - * and can be exceeded under certain circumstances. Cannot be larger than the row group size in - * bytes, and will be adjusted to match if it is. + * @brief Sets the maximum uncompressed page size, in bytes. + * + * Serves as a hint to the writer, * and can be exceeded under certain circumstances. + * Cannot be larger than the row group size in bytes, and will be adjusted to + * match if it is. * * @param val maximum page size * @return this for chaining @@ -979,8 +991,12 @@ class chunked_parquet_writer_options { auto get_row_group_size_rows() const { return _row_group_size_rows; } /** - * @brief Returns maximum uncompressed page size, in bytes. If set larger than the row group size, - * then this will return the row group size. + * @brief Returns maximum uncompressed page size, in bytes. + * + * If set larger than the row group size, then this will return the + * row group size. + * + * @return Maximum uncompressed page size, in bytes */ auto get_max_page_size_bytes() const { @@ -988,8 +1004,11 @@ class chunked_parquet_writer_options { } /** - * @brief Returns maximum page size, in rows. If set larger than the row group size, then this - * will return the row group size. + * @brief Returns maximum page size, in rows. + * + * If set larger than the row group size, then this will return the row group size. + * + * @return Maximum page size, in rows */ auto get_max_page_size_rows() const { @@ -1030,8 +1049,9 @@ class chunked_parquet_writer_options { void set_compression(compression_type compression) { _compression = compression; } /** - * @brief Sets timestamp writing preferences. INT96 timestamps will be written - * if `true` and TIMESTAMP_MICROS will be written if `false`. + * @brief Sets timestamp writing preferences. + * + * INT96 timestamps will be written if `true` and TIMESTAMP_MICROS will be written if `false`. * * @param req Boolean value to enable/disable writing of INT96 timestamps */ @@ -1065,6 +1085,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum uncompressed page size, in bytes. + * + * @param size_bytes Maximum uncompressed page size, in bytes to set */ void set_max_page_size_bytes(size_t size_bytes) { @@ -1074,6 +1096,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum page size, in rows. + * + * @param size_rows The maximum page size, in rows to set */ void set_max_page_size_rows(size_type size_rows) { @@ -1205,9 +1229,10 @@ class chunked_parquet_writer_options_builder { } /** - * @brief Sets the maximum uncompressed page size, in bytes. Serves as a hint to the writer, - * and can be exceeded under certain circumstances. Cannot be larger than the row group size in - * bytes, and will be adjusted to match if it is. + * @brief Sets the maximum uncompressed page size, in bytes. + * + * Serves as a hint to the writer, and can be exceeded under certain circumstances. Cannot be + * larger than the row group size in bytes, and will be adjusted to match if it is. * * @param val maximum page size * @return this for chaining diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 96f169bcb7c..56833502fdf 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -116,6 +116,7 @@ struct column_name_info { /** * @brief Table metadata for io readers/writers (primarily column names) + * * For nested types (structs, maps, unions), the ordering of names in the column_names vector * corresponds to a pre-order traversal of the column tree. * In the example below (2 top-level columns: struct column "col1" and string column "col2"), @@ -519,6 +520,7 @@ class column_in_metadata { /** * @brief Gets the explicitly set nullability for this column. + * * @throws If nullability is not explicitly defined for this column. * Check using `is_nullability_defined()` first. * @return Boolean indicating whether this column is nullable @@ -549,6 +551,7 @@ class column_in_metadata { /** * @brief Get the decimal precision that was set for this column. + * * @throws If decimal precision was not set for this column. * Check using `is_decimal_precision_set()` first. * @return The decimal precision that was set for this column @@ -564,6 +567,7 @@ class column_in_metadata { /** * @brief Get the parquet field id that was set for this column. + * * @throws If parquet field id was not set for this column. * Check using `is_parquet_field_id_set()` first. * @return The parquet field id that was set for this column diff --git a/cpp/include/cudf/ipc.hpp b/cpp/include/cudf/ipc.hpp index 1168aa63d82..7bce6e1bf5a 100644 --- a/cpp/include/cudf/ipc.hpp +++ b/cpp/include/cudf/ipc.hpp @@ -1,15 +1,55 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + #include #include #include #include +/** + * @brief Reads Message objects from cuda buffer source + * + */ class CudaMessageReader : arrow::ipc::MessageReader { public: + /** + * @brief Construct a new Cuda Message Reader object from a cuda buffer stream + * + * @param stream The cuda buffer reader stream + * @param schema The schema of the stream + */ CudaMessageReader(arrow::cuda::CudaBufferReader* stream, arrow::io::BufferReader* schema); + /** + * @brief Open stream from source. + * + * @param stream The cuda buffer reader stream + * @param schema The schema of the stream + * @return arrow::ipc::MessageReader object + */ static std::unique_ptr Open(arrow::cuda::CudaBufferReader* stream, arrow::io::BufferReader* schema); + /** + * @brief Read next Message from the stream. + * + * @return arrow::ipc::Message object + */ arrow::Result> ReadNextMessage() override; private: diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp index 6292c4ce932..10520442510 100644 --- a/cpp/include/cudf/quantiles.hpp +++ b/cpp/include/cudf/quantiles.hpp @@ -30,7 +30,7 @@ namespace cudf { /** * @brief Computes quantiles with interpolation. - + * * Computes the specified quantiles by interpolating values between which they * lie, using the interpolation strategy specified in `interp`. * diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index b298b462a4f..18bcd89a00b 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -36,6 +36,7 @@ class scalar_device_view_base { /** * @brief Returns the value type + * * @returns The value type */ [[nodiscard]] __host__ __device__ data_type type() const noexcept { return _type; } diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 2307f18994b..e529785a758 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -123,7 +123,7 @@ uint64_t random_generator_incrementing_seed(); template class UniformRandomGenerator { public: - using uniform_distribution = uniform_distribution_t; + using uniform_distribution = uniform_distribution_t; ///< The uniform distribution type for T. UniformRandomGenerator() : rng{std::mt19937_64{detail::random_generator_incrementing_seed()}()} {} @@ -144,6 +144,13 @@ class UniformRandomGenerator { { } + /** + * @brief Construct a new Uniform Random Generator to generate uniformly random booleans + * + * @param lower ignored + * @param upper ignored + * @param seed seed to initialize generator with + */ template ()>* = nullptr> UniformRandomGenerator(T lower, T upper, @@ -171,6 +178,7 @@ class UniformRandomGenerator { /** * @brief Returns the next random number. + * * @return generated random number */ template ()>* = nullptr> @@ -179,6 +187,10 @@ class UniformRandomGenerator { return T{dist(rng)}; } + /** + * @brief Returns the next random number. + * @return generated random number + */ template ()>* = nullptr> T generate() { diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 8068373ca65..b67891f6f01 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -81,6 +81,7 @@ class column_wrapper { /** * @brief Releases internal unique_ptr to wrapped column + * * @return unique_ptr to wrapped column */ std::unique_ptr release() { return std::move(wrapped); } @@ -94,7 +95,14 @@ class column_wrapper { */ template struct fixed_width_type_converter { - // Are the types same - simply copy elements from [begin, end) to out + /** + * @brief No conversion necessary: Same type, simply copy element to output. + * + * @tparam FromT Source type + * @tparam ToT Target type + * @param element Source value + * @return The converted target value, same as source value + */ template , void>* = nullptr> @@ -103,7 +111,14 @@ struct fixed_width_type_converter { return element; } - // Are the types convertible or can target be constructed from source? + /** + * @brief Convert types if possible, otherwise construct target from source. + * + * @tparam FromT Source type + * @tparam ToT Target type + * @param element Source value + * @return The converted target value + */ template < typename FromT = From, typename ToT = To, @@ -115,7 +130,14 @@ struct fixed_width_type_converter { return static_cast(element); } - // Convert integral values to timestamps + /** + * @brief Convert integral values to timestamps + * + * @tparam FromT Source type + * @tparam ToT Target type + * @param element Source value + * @return The converted target `timestamp` value + */ template < typename FromT = From, typename ToT = To, @@ -484,6 +506,11 @@ class fixed_width_column_wrapper : public detail::column_wrapper { } }; +/** + * @brief A wrapper for a column of fixed-width elements. + * + * @tparam Rep The type of the column + */ template class fixed_point_column_wrapper : public detail::column_wrapper { public: @@ -1033,17 +1060,20 @@ class dictionary_column_wrapper : public detail::column_wrapper { public: /** * @brief Cast to dictionary_column_view + * */ operator dictionary_column_view() const { return cudf::dictionary_column_view{wrapped->view()}; } /** * @brief Access keys column view + * * @return column_view to keys column */ column_view keys() const { return cudf::dictionary_column_view{wrapped->view()}.keys(); } /** * @brief Access indices column view + * * @return column_view to indices column */ column_view indices() const { return cudf::dictionary_column_view{wrapped->view()}.indices(); } @@ -1445,6 +1475,7 @@ class lists_column_wrapper : public detail::column_wrapper { * @brief Construct a list column containing a single empty, optionally null row. * * @param valid Whether or not the empty row is also null + * @return A list column containing a single empty row */ static lists_column_wrapper make_one_empty_row_column(bool valid = true) { diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp index 7bd704a288d..6c62b0159ca 100644 --- a/cpp/include/cudf_test/cudf_gtest.hpp +++ b/cpp/include/cudf_test/cudf_gtest.hpp @@ -34,6 +34,7 @@ * redefines them properly. */ +// @cond #define Types Types_NOT_USED #define Types0 Types0_NOT_USED #define TypeList TypeList_NOT_USED @@ -89,15 +90,35 @@ struct TypeList> { } // namespace internal } // namespace testing +// @endcond #include #include +/** + * @brief test macro to be expects `expr` to return cudaSuccess + * + * This will stop the test process on failure. + * + * @param expr expression to be tested + */ #define ASSERT_CUDA_SUCCEEDED(expr) ASSERT_EQ(cudaSuccess, expr) +/** + * @brief test macro to be expects `expr` to return cudaSuccess + * + * @param expr expression to be tested + */ #define EXPECT_CUDA_SUCCEEDED(expr) EXPECT_EQ(cudaSuccess, expr) -// Utility for testing the expectation that an expression x throws the specified -// exception whose what() message ends with the msg +/** + * @brief Utility for testing the expectation that an expression x throws the specified + * exception whose what() message ends with the msg + * + * @param x The expression to test + * @param exception The exception type to test for + * @param startswith The start of the expected message + * @param endswith The end of the expected message + */ #define EXPECT_THROW_MESSAGE(x, exception, startswith, endswith) \ do { \ EXPECT_THROW( \ @@ -114,17 +135,36 @@ struct TypeList> { exception); \ } while (0) +/** + * @brief test macro to be expected to throw cudf::logic_error with a message + * + * @param x The statement to be tested + * @param msg The message associated with the exception + */ #define CUDF_EXPECT_THROW_MESSAGE(x, msg) \ EXPECT_THROW_MESSAGE(x, cudf::logic_error, "cuDF failure at:", msg) +/** + * @brief test macro to be expected to throw cudf::cuda_error with a message + * + * @param x The statement to be tested + * @param msg The message associated with the exception + */ #define CUDA_EXPECT_THROW_MESSAGE(x, msg) \ EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg) +/** + * @brief test macro to be expected to throw cudf::fatal_logic_error with a message + * + * @param x The statement to be tested + * @param msg The message associated with the exception + */ #define FATAL_CUDA_EXPECT_THROW_MESSAGE(x, msg) \ EXPECT_THROW_MESSAGE(x, cudf::fatal_cuda_error, "Fatal CUDA error encountered at:", msg) /** * @brief test macro to be expected as no exception. + * * The testing is same with EXPECT_NO_THROW() in gtest. * It also outputs captured error message, useful for debugging. * diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp index d722b836674..e15b0ccb371 100644 --- a/cpp/include/cudf_test/file_utilities.hpp +++ b/cpp/include/cudf_test/file_utilities.hpp @@ -33,6 +33,11 @@ class temp_directory { std::string _path; public: + /** + * @brief Construct a new temp directory object + * + * @param base_name The base name of the temporary directory + */ temp_directory(const std::string& base_name) { std::string dir_template{std::filesystem::temp_directory_path().string()}; @@ -47,8 +52,13 @@ class temp_directory { temp_directory& operator=(temp_directory const&) = delete; temp_directory(temp_directory const&) = delete; + /** + * @brief Move assignment operator + * + * @return Reference to this object + */ temp_directory& operator=(temp_directory&&) = default; - temp_directory(temp_directory&&) = default; + temp_directory(temp_directory&&) = default; ///< Move constructor ~temp_directory() { std::filesystem::remove_all(std::filesystem::path{_path}); } diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index 81965bd1e8a..aaa858ee778 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -42,16 +42,34 @@ namespace test { using expected_value = thrust::tuple; +/** + * @brief Device functor to compute min of a sequence of values serially. + */ template struct column_min { + /** + * @brief Computes the min of a sequence of values serially. + * + * @param vals The sequence of values to compute the min of + * @return The min value + */ __device__ double operator()(device_span vals) { return static_cast(*thrust::min_element(thrust::seq, vals.begin(), vals.end())); } }; +/** + * @brief Device functor to compute max of a sequence of values serially. + */ template struct column_max { + /** + * @brief Computes the max of a sequence of values serially. + * + * @param vals The sequence of values to compute the max of + * @return The max value + */ __device__ double operator()(device_span vals) { return static_cast(*thrust::max_element(thrust::seq, vals.begin(), vals.end())); @@ -62,6 +80,7 @@ struct column_max { * @brief Functor to generate a tdigest. */ struct tdigest_gen { + // @cond template < typename T, typename Func, @@ -79,6 +98,7 @@ struct tdigest_gen { { CUDF_FAIL("Invalid tdigest test type"); } + // @endcond }; /** @@ -120,10 +140,13 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_max, *expected_max); } +/// Expected values for tdigest tests struct expected_tdigest { + // @cond column_view mean; column_view weight; double min, max; + // @endcond }; /** diff --git a/cpp/include/cudf_test/type_list_utilities.hpp b/cpp/include/cudf_test/type_list_utilities.hpp index 42e9af47ece..0cd9f39e29d 100644 --- a/cpp/include/cudf_test/type_list_utilities.hpp +++ b/cpp/include/cudf_test/type_list_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,10 +42,10 @@ * The test `mytest` will be invoked 3 times, once for each of the types `int, * char, float`. * - * Instead of using `::testing::Types` directly, we provide - * `cudf::testing::Types`. This is a drop in replacement for GTest's - * `::testing::Types`. In lieu of including `gtest/gtest.h`, include - * `cudf_gtest.hpp` to ensure `cudf::testing::Types` is used. + * Instead of using \::testing::Types directly, we provide + * `cudf::test::Types`. This is a drop in replacement for GTest's + * \::testing::Types. In lieu of including `gtest/gtest.h`, include + * `cudf_gtest.hpp` to ensure `cudf::test::Types` is used. * * Using the utilities in this file, you can compose complex type lists. * @@ -79,6 +79,7 @@ namespace test { // Types ----------------------------------------- using ::testing::Types; +// @cond template struct GetTypeImpl { static_assert(D == 0, "Out of bounds"); @@ -98,6 +99,7 @@ struct GetTypeImpl, 0> { using type = typename Types::Head; }; +// @endcond /** * @brief Gives the specified type from a type list @@ -115,6 +117,7 @@ template using GetType = typename GetTypeImpl::type; // GetSize ------------------------------- +// @cond template struct GetSizeImpl; @@ -122,6 +125,7 @@ template struct GetSizeImpl> { static constexpr auto value = sizeof...(TYPES); }; +// @endcond /** * @brief Returns the size (number of elements) in a type list @@ -135,6 +139,7 @@ template constexpr auto GetSize = GetSizeImpl::value; // Concat ----------------------------------------- +// @cond namespace detail { template struct Concat2; @@ -167,6 +172,7 @@ template <> struct ConcatImpl<> { using type = Types<>; }; +// @endcond /** * @brief Concatenates compile-time lists of types into a single type list. @@ -181,6 +187,7 @@ template using Concat = typename ConcatImpl::type; // Flatten ----------------------------------------- +// @cond template struct FlattenImpl; @@ -198,6 +205,7 @@ template struct FlattenImpl, TAIL...>> { using type = typename FlattenImpl>::type; }; +// @endcond /** * @brief Flattens nested compile-time lists of types into a single list of @@ -214,6 +222,7 @@ template using Flatten = typename FlattenImpl::type; // CrossProduct ----------------------------------------- +// @cond namespace detail { // prepend T in TUPLE template @@ -262,6 +271,7 @@ struct CrossProductImpl, TAIL...> { template struct CrossProductImpl : CrossProductImpl, TAIL...> { }; +// @endcond /** * @brief Creates a new type list from the cross product (cartesian product) of @@ -282,6 +292,7 @@ template using CrossProduct = typename CrossProductImpl::type; // AllSame ----------------------------------------- +// @cond namespace detail { template struct AllSame : std::false_type { @@ -305,11 +316,12 @@ struct AllSame> : AllSame { }; } // namespace detail +// @endcond /** * @brief Indicates if all types in a list are identical. * - * This is useful as a predicate for for `RemoveIf`. + * This is useful as a predicate for `RemoveIf`. * * Example: * ``` @@ -323,11 +335,17 @@ struct AllSame> : AllSame { * ``` */ struct AllSame { + /** + * @brief Invoked as predicate for RemoveIf + * + * @tparam ITEMS The type to check if they are all same + */ template using Call = detail::AllSame; }; // Exists --------------------------------- +// @cond // Do a linear search to find NEEDLE in HAYSACK template struct ExistsImpl; @@ -346,6 +364,7 @@ struct ExistsImpl> : std::true_type { template struct ExistsImpl> : ExistsImpl> { }; +// @endcond /** * @brief Indicates if a type exists within a type list. @@ -394,6 +413,7 @@ struct ContainedIn { }; // RemoveIf ----------------------------------------- +// @cond template struct RemoveIfImpl; @@ -408,6 +428,7 @@ struct RemoveIfImpl> { Concat::value, Types<>, Types>::type, typename RemoveIfImpl>::type>; }; +// @endcond /** * @brief Removes types from a type list that satisfy a predicate @@ -432,7 +453,7 @@ template using RemoveIf = typename RemoveIfImpl::type; // Transform -------------------------------- - +// @cond template struct TransformImpl; @@ -440,6 +461,7 @@ template struct TransformImpl> { using type = Types...>; }; +// @endcond /** * @brief Applies a transformation to every type in a type list @@ -460,7 +482,7 @@ template using Transform = typename TransformImpl::type; // Repeat -------------------------------- - +// @cond namespace detail { template struct Repeat; @@ -475,6 +497,7 @@ struct Repeat> { using type = Types; }; } // namespace detail +// @endcond /** * @brief Transformation that repeats a type for a specified count. @@ -492,12 +515,17 @@ struct Repeat> { */ template struct Repeat { + /** + * @brief Invoked as predicate for Transform + * + * @tparam T The type to repeat + */ template using Call = typename detail::Repeat>::type; }; // Append -------------------------------- - +// @cond template struct AppendImpl; @@ -505,6 +533,7 @@ template struct AppendImpl, TAIL...> { using type = Types; }; +// @endcond /** * @brief Appends types to a type list @@ -523,7 +552,7 @@ using Append = typename AppendImpl::type; // Remove ------------------------------------------- // remove items from tuple given by their indices - +// @cond namespace detail { template struct Remove; @@ -556,6 +585,7 @@ template struct RemoveImpl { using type = typename detail::Remove::type; }; +// @endcond /** * @brief Removes types at specified indices from a type list. @@ -567,7 +597,7 @@ template using Remove = typename RemoveImpl::type; // Unique -------------------------------- - +// @cond namespace detail { template struct Unique; @@ -591,6 +621,7 @@ template struct UniqueImpl> { using type = typename detail::Unique::type; }; +// @endcond /** * @brief Removes duplicate types from a type list diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index ac2892a0f34..6ea4311c8cb 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -146,6 +146,17 @@ make_type_param_scalar(T const init_value) return static_cast(init_value); } +/** + * @brief Convert the timestamp value of type T to a fixed width type of type TypeParam. + * + * This function is necessary because some types (such as timestamp types) are not directly + * constructible from timestamp types. This function is offered as a convenience to allow + * implicitly constructing such objects from timestamp values. + * + * @param init_value Value used to initialize the fixed width type + * @return A fixed width type - TimeStamp of type TypeParam with the + * value specified + */ template std::enable_if_t::value, TypeParam> make_type_param_scalar( T const init_value) @@ -153,6 +164,14 @@ std::enable_if_t::value, TypeParam> make_type_pa return TypeParam{typename TypeParam::duration(init_value)}; } +/** + * @brief Convert the numeric value of type T to a string type. + * + * This function converts the numeric value of type T to its string representation. + * + * @param init_value Value to convert to a string + * @return string representation of the value + */ template std::enable_if_t, TypeParam> make_type_param_scalar( T const init_value) @@ -283,9 +302,11 @@ using FixedWidthTypes = Concat; /** * @brief Provides a list of all fixed-width element types except for the - * fixed-point types for use in GTest typed tests. Certain tests written for - * fixed-width types don't work for fixed-point as fixed-point types aren't - * constructible from other fixed-width types (a scale needs to be specified) + * fixed-point types for use in GTest typed tests. + * + * Certain tests written for fixed-width types don't work for fixed-point as + * fixed-point types aren't constructible from other fixed-width types + * because a scale needs to be specified. * * Example: * ``` diff --git a/cpp/include/nvtext/bpe_tokenize.hpp b/cpp/include/nvtext/bpe_tokenize.hpp index dcd24674029..8658fc29333 100644 --- a/cpp/include/nvtext/bpe_tokenize.hpp +++ b/cpp/include/nvtext/bpe_tokenize.hpp @@ -35,19 +35,43 @@ namespace nvtext { */ struct bpe_merge_pairs { struct bpe_merge_pairs_impl; - std::unique_ptr impl{}; + std::unique_ptr impl{}; ///< Implementation of the BPE merge pairs table. + /** + * @brief Construct a new bpe merge pairs object + * + * @param input The input file containing the BPE merge pairs + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the device memory + */ bpe_merge_pairs(std::unique_ptr&& input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** + * @brief Construct a new bpe merge pairs object + * + * @param input The input column of strings + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the device memory + */ bpe_merge_pairs(cudf::strings_column_view const& input, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); ~bpe_merge_pairs(); + /** + * @brief Returns the number of merge pairs in the table. + * + * @return The number of merge pairs in the table + */ cudf::size_type get_size(); + /** + * @brief Returns the number of unique merge pairs in the table. + * + * @return The number of unique merge pairs in the table + */ std::size_t get_map_size(); }; @@ -80,6 +104,7 @@ struct bpe_merge_pairs { * * @param filename_merges Local file path of pairs encoded in UTF-8. * @param mr Memory resource to allocate any returned objects. + * @return A nvtext::bpe_merge_pairs object */ std::unique_ptr load_merge_pairs_file( std::string const& filename_merges, @@ -111,6 +136,7 @@ std::unique_ptr load_merge_pairs_file( * @param separator String used to build the output after encoding. * Default is a space. * @param mr Memory resource to allocate any returned objects. + * @return An encoded column of strings. */ std::unique_ptr byte_pair_encoding( cudf::strings_column_view const& input, diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp index 9d75295cd39..df2b82102d1 100644 --- a/cpp/include/nvtext/subword_tokenize.hpp +++ b/cpp/include/nvtext/subword_tokenize.hpp @@ -31,17 +31,22 @@ namespace nvtext { * @brief The vocabulary data for use with the subword_tokenize function. */ struct hashed_vocabulary { - uint16_t first_token_id{}; - uint16_t separator_token_id{}; - uint16_t unknown_token_id{}; - uint32_t outer_hash_a{}; - uint32_t outer_hash_b{}; - uint16_t num_bins{}; - std::unique_ptr table; // uint64 - std::unique_ptr bin_coefficients; // uint64 - std::unique_ptr bin_offsets; // uint16 - std::unique_ptr cp_metadata; // uint32 - std::unique_ptr aux_cp_table; // uint64 + uint16_t first_token_id{}; ///< The first token id in the vocabulary + uint16_t separator_token_id{}; ///< The separator token id in the vocabulary + uint16_t unknown_token_id{}; ///< The unknown token id in the vocabulary + uint32_t outer_hash_a{}; ///< The a parameter for the outer hash + uint32_t outer_hash_b{}; ///< The b parameter for the outer hash + uint16_t num_bins{}; ///< Number of bins + std::unique_ptr table; ///< uint64 column, the flattened hash table with key, value + ///< pairs packed in 64-bits + std::unique_ptr bin_coefficients; ///< uint64 column, containing the hashing + ///< parameters for each hash bin on the GPU + std::unique_ptr bin_offsets; ///< uint16 column, containing the start index of each + ///< bin in the flattened hash table + std::unique_ptr + cp_metadata; ///< uint32 column, The code point metadata table to use for normalization + std::unique_ptr + aux_cp_table; ///< uint64 column, The auxiliary code point table to use for normalization }; /** diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index a4ff18054b1..471f0e4fa46 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -55,9 +55,24 @@ using python_callable_type = void*; */ class python_oauth_refresh_callback : public RdKafka::OAuthBearerTokenRefreshCb { public: + /** + * @brief Construct a new python oauth refresh callback object + * + * @param callback_wrapper Cython wrapper that will + * be used to invoke the `python_callable`. This wrapper serves the purpose + * of preventing us from having to link against the Python development library + * in libcudf_kafka. + * @param python_callable pointer to a Python `functools.partial` object + */ python_oauth_refresh_callback(kafka_oauth_callback_wrapper_type callback_wrapper, python_callable_type python_callable); + /** + * @brief Invoke the Python callback function to get the OAuth token and its expiration time + * + * @param handle + * @param oauthbearer_config pointer to the OAuthBearerConfig object + */ void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index c65774d2e1a..dc2c62d03c0 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -141,6 +141,7 @@ class kafka_consumer : public cudf::io::datasource { * @param[in] cached If True uses the last retrieved value from the Kafka broker, if False * the latest value will be retrieved from the Kafka broker by making a network * request. + * @return The watermark offset value for the specified topic/partition */ std::map get_watermark_offset(std::string const& topic, int partition, @@ -179,6 +180,7 @@ class kafka_consumer : public cudf::io::datasource { * @brief Close the underlying socket connection to Kafka and clean up system resources * * @throws cudf::logic_error on failure to close the connection + * @param timeout Max milliseconds to wait on a response */ void close(int timeout);