diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 5c7513a6c99..a26a0c7947b 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -120,18 +120,56 @@ class aggregation { }; aggregation() = delete; + + /** + * @brief Construct a new aggregation object + * + * @param a aggregation::Kind enum value + */ aggregation(aggregation::Kind a) : kind{a} {} Kind kind; ///< The aggregation to perform virtual ~aggregation() = default; + /** + * @brief Compares two aggregation objects for equality + * + * @param other The other aggregation to compare with + * @return True if the two aggregations are equal + */ [[nodiscard]] virtual bool is_equal(aggregation const& other) const { return kind == other.kind; } + + /** + * @brief Computes the hash value of the aggregation + * + * @return The hash value of the aggregation + */ [[nodiscard]] virtual size_t do_hash() const { return std::hash{}(kind); } + + /** + * @pure @brief Clones the aggregation object + * + * @return A copy of the aggregation object + */ [[nodiscard]] virtual std::unique_ptr clone() const = 0; // override functions for compound aggregations + /** + * @pure @brief Get the simple aggregations that this aggregation requires to compute. + * + * @param col_type The type of the column to aggregate + * @param collector The collector visitor pattern to use to collect the simple aggregations + * @return Vector of pre-requisite simple aggregations + */ virtual std::vector> get_simple_aggregations( data_type col_type, cudf::detail::simple_aggregations_collector& collector) const = 0; - virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0; + + /** + * @pure @brief Compute the aggregation after pre-requisite simple aggregations have been + * computed. + * + * @param finalizer The finalizer visitor pattern to use to compute the aggregation + */ + virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0; }; /** @@ -147,7 +185,8 @@ class rolling_aggregation : public virtual aggregation { protected: rolling_aggregation() {} - rolling_aggregation(aggregation::Kind a) : aggregation{a} {} + /// constructor inherited from cudf::aggregation + using aggregation::aggregation; }; /** @@ -205,46 +244,57 @@ class segmented_reduce_aggregation : public virtual aggregation { segmented_reduce_aggregation() {} }; +/// Type of code in the user defined function string. enum class udf_type : bool { CUDA, PTX }; +/// Type of correlation method. enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN }; /// Factory to create a SUM aggregation +/// @return A SUM aggregation object template std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation +/// @return A PRODUCT aggregation object template std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation +/// @return A MIN aggregation object template std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation +/// @return A MAX aggregation object template std::unique_ptr make_max_aggregation(); /** * @brief Factory to create a COUNT aggregation * - * @param null_handling Indicates if null values will be counted. + * @param null_handling Indicates if null values will be counted + * @return A COUNT aggregation object */ template std::unique_ptr make_count_aggregation(null_policy null_handling = null_policy::EXCLUDE); /// Factory to create an ANY aggregation +/// @return A ANY aggregation object template std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation +/// @return A ALL aggregation object template std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation +/// @return A SUM_OF_SQUARES aggregation object template std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation +/// @return A MEAN aggregation object template std::unique_ptr make_mean_aggregation(); @@ -258,6 +308,7 @@ std::unique_ptr make_mean_aggregation(); * deviation across multiple discrete sets. See * `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` for more * detail. + * @return A M2 aggregation object */ template std::unique_ptr make_m2_aggregation(); @@ -269,6 +320,7 @@ std::unique_ptr make_m2_aggregation(); * `variance` is `N - ddof`, where `N` is the population size. * * @throw cudf::logic_error if input type is chrono or compound types. + * @return A VARIANCE aggregation object */ template std::unique_ptr make_variance_aggregation(size_type ddof = 1); @@ -280,11 +332,13 @@ std::unique_ptr make_variance_aggregation(size_type ddof = 1); * `std` is `N - ddof`, where `N` is the population size. * * @throw cudf::logic_error if input type is chrono or compound types. + * @return A STD aggregation object */ template std::unique_ptr make_std_aggregation(size_type ddof = 1); /// Factory to create a MEDIAN aggregation +/// @return A MEDIAN aggregation object template std::unique_ptr make_median_aggregation(); @@ -293,54 +347,60 @@ std::unique_ptr make_median_aggregation(); * * @param quantiles The desired quantiles * @param interp The desired interpolation + * @return A QUANTILE aggregation object */ template std::unique_ptr make_quantile_aggregation(std::vector const& quantiles, interpolation interp = interpolation::LINEAR); /** - * @brief Factory to create an `argmax` aggregation + * @brief Factory to create an ARGMAX aggregation * - * `argmax` returns the index of the maximum element. + * ARGMAX returns the index of the maximum element. + * @return A ARGMAX aggregation object */ template std::unique_ptr make_argmax_aggregation(); /** - * @brief Factory to create an `argmin` aggregation + * @brief Factory to create an ARGMIN aggregation * * `argmin` returns the index of the minimum element. + * @return A ARGMIN aggregation object */ template std::unique_ptr make_argmin_aggregation(); /** - * @brief Factory to create a `nunique` aggregation + * @brief Factory to create a NUNIQUE aggregation * - * `nunique` returns the number of unique elements. - * @param null_handling Indicates if null values will be counted. + * NUNIQUE returns the number of unique elements. + * @param null_handling Indicates if null values will be counted + * @return A NUNIQUE aggregation object */ template std::unique_ptr make_nunique_aggregation(null_policy null_handling = null_policy::EXCLUDE); /** - * @brief Factory to create a `nth_element` aggregation + * @brief Factory to create a NTH_ELEMENT aggregation * - * `nth_element` returns the n'th element of the group/series. + * NTH_ELEMENT returns the n'th element of the group/series. * * If @p n is not within the range `[-group_size, group_size)`, the result of * the respective group will be null. Negative indices `[-group_size, -1]` * corresponds to `[0, group_size-1]` indices respectively where `group_size` is * the size of each group. * - * @param n index of nth element in each group. - * @param null_handling Indicates to include/exclude nulls during indexing. + * @param n index of nth element in each group + * @param null_handling Indicates to include/exclude nulls during indexing + * @return A NTH_ELEMENT aggregation object */ template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling = null_policy::INCLUDE); /// Factory to create a ROW_NUMBER aggregation +/// @return A ROW_NUMBER aggregation object template std::unique_ptr make_row_number_aggregation(); @@ -408,12 +468,13 @@ std::unique_ptr make_row_number_aggregation(); * * @endcode * - * @param method The ranking method used for tie breaking (same values). + * @param method The ranking method used for tie breaking (same values) * @param column_order The desired sort order for ranking - * @param null_handling flag to include nulls during ranking. If nulls are not included, + * @param null_handling flag to include nulls during ranking If nulls are not included, * the corresponding rank will be null. * @param null_precedence The desired order of null compared to other elements for column * @param percentage enum to denote the type of conversion of ranks to percentage in range (0,1] + * @return A RANK aggregation object */ template std::unique_ptr make_rank_aggregation(rank_method method, @@ -430,7 +491,8 @@ std::unique_ptr make_rank_aggregation(rank_method method, * If `null_handling` is set to `EXCLUDE`, null elements are dropped from each * of the list rows. * - * @param null_handling Indicates whether to include/exclude nulls in list elements. + * @param null_handling Indicates whether to include/exclude nulls in list elements + * @return A COLLECT_LIST aggregation object */ template std::unique_ptr make_collect_list_aggregation( @@ -450,17 +512,28 @@ std::unique_ptr make_collect_list_aggregation( * equal. * @param nans_equal Flag to specify whether NaN values in floating point column should be * considered equal. + * @return A COLLECT_SET aggregation object */ template std::unique_ptr make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL); -/// Factory to create a LAG aggregation +/** + * @brief Factory to create a LAG aggregation + * + * @param offset The number of rows to lag the input + * @return A LAG aggregation object + */ template std::unique_ptr make_lag_aggregation(size_type offset); -/// Factory to create a LEAD aggregation +/** + * @brief Factory to create a LEAD aggregation + * + * @param offset The number of rows to lead the input + * @return A LEAD aggregation object + */ template std::unique_ptr make_lead_aggregation(size_type offset); @@ -471,7 +544,7 @@ std::unique_ptr make_lead_aggregation(size_type offset); * @param[in] user_defined_aggregator A string containing the aggregator code * @param[in] output_type expected output type * - * @return aggregation unique pointer housing user_defined_aggregator string. + * @return An aggregation containing a user-defined aggregator string */ template std::unique_ptr make_udf_aggregation(udf_type type, @@ -486,6 +559,8 @@ std::unique_ptr make_udf_aggregation(udf_type type, * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the * input lists column to be non-nullable (the child column containing list entries is not subjected * to this requirement). + * + * @return A MERGE_LISTS aggregation object */ template std::unique_ptr make_merge_lists_aggregation(); @@ -510,6 +585,7 @@ std::unique_ptr make_merge_lists_aggregation(); * during dropping duplicate list entries. * @param nans_equal Flag to specify whether NaN values in floating point column should be * considered equal during dropping duplicate list entries. + * @return A MERGE_SETS aggregation object */ template std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL, @@ -526,6 +602,8 @@ std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = nu * * The input `M2` aggregation values are expected to be all non-negative numbers, since they * were output from `M2` aggregation. + * + * @return A MERGE_M2 aggregation object */ template std::unique_ptr make_merge_m2_aggregation(); @@ -535,9 +613,10 @@ std::unique_ptr make_merge_m2_aggregation(); * * Compute covariance between two columns. * The input columns are child columns of a non-nullable struct columns. - * @param min_periods Minimum number of non-null observations required to produce a result. + * @param min_periods Minimum number of non-null observations required to produce a result * @param ddof Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N is * the number of non-null observations. + * @return A COVARIANCE aggregation object */ template std::unique_ptr make_covariance_aggregation(size_type min_periods = 1, size_type ddof = 1); @@ -549,7 +628,8 @@ std::unique_ptr make_covariance_aggregation(size_type min_periods = 1, siz * The input columns are child columns of a non-nullable struct columns. * * @param type correlation_type - * @param min_periods Minimum number of non-null observations required to produce a result. + * @param min_periods Minimum number of non-null observations required to produce a result + * @return A CORRELATION aggregation object */ template std::unique_ptr make_correlation_aggregation(correlation_type type, @@ -587,7 +667,7 @@ std::unique_ptr make_correlation_aggregation(correlation_type type, * the computed tdigests: A value of 1000 will result in a tdigest containing no * more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information. * - * @returns A TDIGEST aggregation object. + * @return A TDIGEST aggregation object */ template std::unique_ptr make_tdigest_aggregation(int max_centroids = 1000); @@ -625,7 +705,7 @@ std::unique_ptr make_tdigest_aggregation(int max_centroids = 1000); * the computed tdigests: A value of 1000 will result in a tdigest containing no * more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information. * - * @returns A MERGE_TDIGEST aggregation object. + * @return A MERGE_TDIGEST aggregation object */ template std::unique_ptr make_merge_tdigest_aggregation(int max_centroids = 1000); diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 182cbbdc3ec..62c75fffc5c 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,9 +52,9 @@ rmm::device_buffer concatenate_masks( * If types of the input columns mismatch * * @param columns_to_concat host_span of column views to be concatenated into a single column - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return Unique pointer to a single table having all the rows from the - * elements of `columns_to_concat` respectively in the same order. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return A single column having all the rows from the elements of `columns_to_concat` respectively + * in the same order. */ std::unique_ptr concatenate( host_span columns_to_concat, @@ -82,9 +82,9 @@ std::unique_ptr concatenate( * If number of columns mismatch * * @param tables_to_concat host_span of table views to be concatenated into a single table - * @param mr Device memory resource used to allocate the returned table's device memory. - * @return Unique pointer to a single table having all the rows from the - * elements of `tables_to_concat` respectively in the same order. + * @param mr Device memory resource used to allocate the returned table's device memory + * @return A single table having all the rows from the elements of `tables_to_concat` respectively + * in the same order. */ std::unique_ptr concatenate( host_span tables_to_concat, diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 880edaedbd2..bc64dbe6cd4 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -95,6 +95,7 @@ std::unique_ptr
gather( * * @param source_table Table that will be reversed * @param mr Device memory resource used to allocate the returned table's device memory + * @return Reversed table */ std::unique_ptr
reverse( table_view const& source_table, @@ -111,6 +112,7 @@ std::unique_ptr
reverse( * * @param source_column Column that will be reversed * @param mr Device memory resource used to allocate the returned table's device memory + * @return Reversed column */ std::unique_ptr reverse( column_view const& source_column, @@ -150,7 +152,7 @@ std::unique_ptr reverse( * are to be scattered * @param check_bounds Optionally perform bounds checking on the values of * `scatter_map` and throw an error if any of its values are out of bounds. - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ std::unique_ptr
scatter( @@ -190,7 +192,7 @@ std::unique_ptr
scatter( * are to be scattered * @param check_bounds Optionally perform bounds checking on the values of * `scatter_map` and throw an error if any of its values are out of bounds. - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ std::unique_ptr
scatter( @@ -230,7 +232,7 @@ std::unique_ptr empty_like(scalar const& input); * Supports only fixed-width types. * * @param[in] input Immutable view of input column to emulate - * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN * @param[in] mr Device memory resource used to allocate the returned column's device memory * @return A column with sufficient uninitialized capacity to hold the same * number of elements as `input` of the same type as `input.type()` @@ -246,7 +248,7 @@ std::unique_ptr allocate_like( * * @param[in] input Immutable view of input column to emulate * @param[in] size The desired number of elements that the new column should have capacity for - * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN * @param[in] mr Device memory resource used to allocate the returned column's device memory * @return A column with sufficient uninitialized capacity to hold the specified number of elements * as `input` of the same type as `input.type()` @@ -326,13 +328,13 @@ void copy_range_in_place(column_view const& source, * @p target_begin + (@p source_end - @p source_begin) > @p target.size()). * @throws cudf::logic_error if @p target and @p source have different types. * - * @param source The column to copy from inside the range. - * @param target The column to copy from outside the range. + * @param source The column to copy from inside the range + * @param target The column to copy from outside the range * @param source_begin The starting index of the source range (inclusive) * @param source_end The index of the last element in the source range * (exclusive) * @param target_begin The starting index of the target range (inclusive) - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr The result target column */ std::unique_ptr copy_range( @@ -369,13 +371,15 @@ std::unique_ptr copy_range( * @note if the input is nullable, the output will be nullable. * @note if the fill value is null, the output will be nullable. * - * @param input Column to be shifted. - * @param offset The offset by which to shift the input. - * @param fill_value Fill value for indeterminable outputs. + * @param input Column to be shifted + * @param offset The offset by which to shift the input + * @param fill_value Fill value for indeterminable outputs * @param mr Device memory resource used to allocate the returned result's device memory * * @throw cudf::logic_error if @p input dtype is neither fixed-width nor string type * @throw cudf::logic_error if @p fill_value dtype does not match @p input dtype. + * + * @return The shifted column */ std::unique_ptr shift( column_view const& input, @@ -410,8 +414,8 @@ std::unique_ptr shift( * the range [0, input.size()). * * @param input View of column to slice - * @param indices Indices used to take slices of `input`. - * @return Vector of views of `input` indicated by the ranges in `indices`. + * @param indices Indices used to take slices of `input` + * @return Vector of views of `input` indicated by the ranges in `indices` */ std::vector slice(column_view const& input, host_span indices); /** @@ -449,8 +453,8 @@ std::vector slice(column_view const& input, std::initializer_list slice(table_view const& input, host_span indices); /** @@ -489,7 +493,7 @@ std::vector slice(table_view const& input, std::initializer_list split(column_view const& input, host_span splits); /** @@ -530,7 +534,7 @@ std::vector split(column_view const& input, std::initializer_list split(table_view const& input, host_span splits); /** @@ -555,8 +559,26 @@ struct packed_columns { */ struct metadata { metadata() = default; + + /** + * @brief Construct a new metadata object + * + * @param v Host-side buffer containing metadata + */ metadata(std::vector&& v) : data_(std::move(v)) {} + + /** + * @brief Returns pointer to the host-side metadata buffer data + * + * @return Pointer to the host-side metadata buffer + */ [[nodiscard]] uint8_t const* data() const { return data_.data(); } + + /** + * @brief Returns size of the metadata buffer + * + * @return Size of the metadata buffer + */ [[nodiscard]] size_t size() const { return data_.size(); } private: @@ -567,17 +589,24 @@ struct packed_columns { : metadata_(std::make_unique()), gpu_data(std::make_unique()) { } + + /** + * @brief Construct a new packed columns object + * + * @param md Host-side metadata buffer + * @param gd Device-side data buffer + */ packed_columns(std::unique_ptr&& md, std::unique_ptr&& gd) : metadata_(std::move(md)), gpu_data(std::move(gd)) { } - std::unique_ptr metadata_; - std::unique_ptr gpu_data; + std::unique_ptr metadata_; ///< Host-side metadata buffer + std::unique_ptr gpu_data; ///< Device-side data buffer }; /** - * @brief The result(s) of a `contiguous_split` + * @brief The result(s) of a cudf::contiguous_split * * @ingroup copy_split * @@ -591,8 +620,8 @@ struct packed_columns { * not outlive the memory owned by `data` */ struct packed_table { - cudf::table_view table; - packed_columns data; + cudf::table_view table; ///< Result table_view of a cudf::contiguous_split + packed_columns data; ///< Column data owned }; /** @@ -666,8 +695,8 @@ packed_columns pack(cudf::table_view const& input, * @param table View of the table to pack * @param contiguous_buffer A contiguous buffer of device memory which contains the data referenced * by the columns in `table` - * @param buffer_size The size of `contiguous_buffer`. - * @return Vector of bytes representing the metadata used to `unpack` a packed_columns struct. + * @param buffer_size The size of `contiguous_buffer` + * @return Vector of bytes representing the metadata used to `unpack` a packed_columns struct */ packed_columns::metadata pack_metadata(table_view const& table, uint8_t const* contiguous_buffer, @@ -834,10 +863,10 @@ std::unique_ptr copy_if_else( * * @param[in] input table_view (set of dense columns) to scatter * @param[in] target table_view to modify with scattered values from `input` - * @param[in] boolean_mask column_view which acts as boolean mask. - * @param[in] mr Device memory resource used to allocate device memory of the returned table. + * @param[in] boolean_mask column_view which acts as boolean mask + * @param[in] mr Device memory resource used to allocate device memory of the returned table * - * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`. + * @returns Returns a table by scattering `input` into `target` as per `boolean_mask` */ std::unique_ptr
boolean_mask_scatter( table_view const& input, @@ -871,10 +900,10 @@ std::unique_ptr
boolean_mask_scatter( * * @param[in] input scalars to scatter * @param[in] target table_view to modify with scattered values from `input` - * @param[in] boolean_mask column_view which acts as boolean mask. - * @param[in] mr Device memory resource used to allocate device memory of the returned table. + * @param[in] boolean_mask column_view which acts as boolean mask + * @param[in] mr Device memory resource used to allocate device memory of the returned table * - * @returns Returns a table by scattering `input` into `target` as per `boolean_mask`. + * @returns Returns a table by scattering `input` into `target` as per `boolean_mask` */ std::unique_ptr
boolean_mask_scatter( std::vector> const& input, @@ -892,7 +921,7 @@ std::unique_ptr
boolean_mask_scatter( * * @param input Column view to get the element from * @param index Index into `input` to get the element at - * @param mr Device memory resource used to allocate the returned scalar's device memory. + * @param mr Device memory resource used to allocate the returned scalar's device memory * @return std::unique_ptr Scalar containing the single value */ std::unique_ptr get_element( @@ -927,10 +956,10 @@ enum class sample_with_replacement : bool { * @throws cudf::logic_error if `n` > `input.num_rows()` and `replacement` == FALSE. * @throws cudf::logic_error if `n` < 0. * - * @param input View of a table to sample. - * @param n non-negative number of samples expected from `input`. - * @param replacement Allow or disallow sampling of the same row more than once. - * @param seed Seed value to initiate random number generator. + * @param input View of a table to sample + * @param n non-negative number of samples expected from `input` + * @param replacement Allow or disallow sampling of the same row more than once + * @param seed Seed value to initiate random number generator * @param mr Device memory resource used to allocate the returned table's device memory * * @return std::unique_ptr
Table containing samples from `input` @@ -955,8 +984,8 @@ std::unique_ptr
sample( * * @param input The column which is (and whose descendants are) to be checked for * non-empty null rows. - * @return true If either the column or its descendants have non-empty null rows. - * @return false If neither the column or its descendants have non-empty null rows. + * @return true If either the column or its descendants have non-empty null rows + * @return false If neither the column or its descendants have non-empty null rows */ bool has_nonempty_nulls(column_view const& input); diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 894eb44e8b1..ac8a4138074 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -38,7 +38,7 @@ namespace datetime { * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t years * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -52,7 +52,7 @@ std::unique_ptr extract_year( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t months * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -66,7 +66,7 @@ std::unique_ptr extract_month( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t days * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -80,7 +80,7 @@ std::unique_ptr extract_day( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t days * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -94,7 +94,7 @@ std::unique_ptr extract_weekday( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t hours * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -108,7 +108,7 @@ std::unique_ptr extract_hour( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t minutes * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -122,7 +122,7 @@ std::unique_ptr extract_minute( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t seconds * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -143,7 +143,7 @@ std::unique_ptr extract_second( * cudf::column. * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column containing last day of the month as TIMESTAMP_DAYS * @throw cudf::logic_error if input column datatype is not TIMESTAMP @@ -157,9 +157,9 @@ std::unique_ptr last_day_of_month( * returns an int16_t cudf::column. The value is between [1, {365-366}] * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * - * @returns cudf::column of datatype INT16 containing the day number since the start of the year. + * @returns cudf::column of datatype INT16 containing the day number since the start of the year * @throw cudf::logic_error if input column datatype is not a TIMESTAMP */ std::unique_ptr day_of_year( @@ -190,11 +190,11 @@ std::unique_ptr day_of_year( * is not INT16 or INT32. * @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size. * - * @param timestamps cudf::column_view of timestamp type. - * @param months cudf::column_view of integer type containing the number of months to add. - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param timestamps cudf::column_view of timestamp type + * @param months cudf::column_view of integer type containing the number of months to add + * @param mr Device memory resource used to allocate device memory of the returned column * - * @returns cudf::column of timestamp type containing the computed timestamps. + * @returns cudf::column of timestamp type containing the computed timestamps */ std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, @@ -225,11 +225,11 @@ std::unique_ptr add_calendrical_months( * is not INT16 or INT32. * @throw cudf::logic_error if `timestamps` column size is not equal to `months` column size. * - * @param timestamps cudf::column_view of timestamp type. - * @param months cudf::scalar of integer type containing the number of months to add. - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param timestamps cudf::column_view of timestamp type + * @param months cudf::scalar of integer type containing the number of months to add + * @param mr Device memory resource used to allocate device memory of the returned column * - * @return cudf::column of timestamp type containing the computed timestamps. + * @return cudf::column of timestamp type containing the computed timestamps */ std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, @@ -244,7 +244,7 @@ std::unique_ptr add_calendrical_months( * `output[i] is null` if `column[i]` is null * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of datatype BOOL8 truth value of the corresponding date * @throw cudf::logic_error if input column datatype is not a TIMESTAMP @@ -262,7 +262,7 @@ std::unique_ptr is_leap_year( * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * * @param column cudf::column_view of the input datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * @return cudf::column of datatype INT16 of days in month of the corresponding date */ std::unique_ptr days_in_month( @@ -278,7 +278,7 @@ std::unique_ptr days_in_month( * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * * @param column The input column containing datetime values - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param mr Device memory resource used to allocate device memory of the returned column * @return A column of INT16 type indicating which quarter the date is in */ std::unique_ptr extract_quarter( @@ -302,12 +302,12 @@ enum class rounding_frequency : int32_t { /** * @brief Round datetimes up to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values. - * @param freq rounding_frequency indicating the frequency to round up to. - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param column cudf::column_view of the input datetime values + * @param freq rounding_frequency indicating the frequency to round up to + * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. - * @return cudf::column of the same datetime resolution as the input column. + * @return cudf::column of the same datetime resolution as the input column */ std::unique_ptr ceil_datetimes( cudf::column_view const& column, @@ -317,12 +317,12 @@ std::unique_ptr ceil_datetimes( /** * @brief Round datetimes down to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values. - * @param freq rounding_frequency indicating the frequency to round down to. - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param column cudf::column_view of the input datetime values + * @param freq rounding_frequency indicating the frequency to round down to + * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. - * @return cudf::column of the same datetime resolution as the input column. + * @return cudf::column of the same datetime resolution as the input column */ std::unique_ptr floor_datetimes( cudf::column_view const& column, @@ -332,12 +332,12 @@ std::unique_ptr floor_datetimes( /** * @brief Round datetimes to the nearest multiple of the given frequency. * - * @param column cudf::column_view of the input datetime values. - * @param freq rounding_frequency indicating the frequency to round to. - * @param mr Device memory resource used to allocate device memory of the returned column. + * @param column cudf::column_view of the input datetime values + * @param freq rounding_frequency indicating the frequency to round to + * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. - * @return cudf::column of the same datetime resolution as the input column. + * @return cudf::column of the same datetime resolution as the input column */ std::unique_ptr round_datetimes( cudf::column_view const& column, diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 905a897eb40..bd68bac5fab 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -140,7 +140,7 @@ std::unique_ptr
repeat( * * @param input_table Input table * @param count Number of repetitions - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param mr Device memory resource used to allocate the returned table's device memory * @return The result table containing the repetitions */ std::unique_ptr
repeat( @@ -223,7 +223,7 @@ std::unique_ptr sequence( * @param months Months to increment * @param mr Device memory resource used to allocate the returned column's device memory * - * @return Timestamps column with sequences of months. + * @return Timestamps column with sequences of months */ std::unique_ptr calendrical_month_sequence( size_type size, diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 3b8354ebc9f..fde0937965a 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -305,9 +305,9 @@ class groupby { * `offsets[i+1] - offsets[i]` gives the size of group `i`. */ struct groups { - std::unique_ptr
keys; - std::vector offsets; - std::unique_ptr
values; + std::unique_ptr
keys; ///< Table of grouped keys + std::vector offsets; ///< Group Offsets + std::unique_ptr
values; ///< Table of grouped values }; /** @@ -353,10 +353,10 @@ class groupby { * {"x" "x" "x" @ "tt" "tt" @} * @endcode * - * @param[in] values A table whose column null values will be replaced. + * @param[in] values A table whose column null values will be replaced * @param[in] replace_policies Specify the position of replacement values relative to null values, * one for each column - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] mr Device memory resource used to allocate device memory of the returned column * * @return Pair that contains a table with the sorted keys and the result column */ diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index bbff304e547..b3013fdb3cb 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -20,7 +20,7 @@ namespace cudf { -using hash_value_type = uint32_t; +using hash_value_type = uint32_t; ///< Type of hash value /** * @addtogroup column_hash @@ -47,12 +47,12 @@ static constexpr uint32_t DEFAULT_HASH_SEED = 0; /** * @brief Computes the hash value of each row in the input set of columns. * - * @param input The table of columns to hash. - * @param hash_function The hash function enum to use. - * @param seed Optional seed value to use for the hash function. - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param input The table of columns to hash + * @param hash_function The hash function enum to use + * @param seed Optional seed value to use for the hash function + * @param mr Device memory resource used to allocate the returned column's device memory * - * @returns A column where each row is the hash of a column from the input. + * @returns A column where each row is the hash of a column from the input */ std::unique_ptr hash( table_view const& input, diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index bbe0eb0eaac..2e1679a17f3 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ namespace cudf { * @throw cudf::logic_error if the any of the DLTensor fields are unsupported * * @param managed_tensor a 1D or 2D column-major (Fortran order) tensor - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param mr Device memory resource used to allocate the returned table's device memory * * @return Table with a copy of the tensor data */ @@ -67,7 +67,7 @@ std::unique_ptr
from_dlpack( * or if any of columns have non-zero null count * * @param input Table to convert to DLPack - * @param mr Device memory resource used to allocate the returned DLPack tensor's device memory. + * @param mr Device memory resource used to allocate the returned DLPack tensor's device memory * * @return 1D or 2D DLPack tensor with a copy of the table data, or nullptr */ @@ -84,15 +84,20 @@ DLManagedTensor* to_dlpack( */ /** - * @brief Detailed meta data information for arrow array. + * @brief Detailed metadata information for arrow array. * * As of now this contains only name in the hierarchy of children of cudf column, * but in future this can be updated as per requirement. */ struct column_metadata { - std::string name; - std::vector children_meta; + std::string name; ///< Name of the column + std::vector children_meta; ///< Metadata of children of the column + /** + * @brief Construct a new column metadata object + * + * @param _name Name of the column + */ column_metadata(std::string const& _name) : name(_name) {} column_metadata() = default; }; @@ -119,7 +124,7 @@ std::shared_ptr to_arrow(table_view input, * * @param input arrow:Table that needs to be converted to `cudf::table` * @param mr Device memory resource used to allocate `cudf::table` - * @return cudf table generated from given arrow Table. + * @return cudf table generated from given arrow Table */ std::unique_ptr
from_arrow( diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 0e00d14291d..17c168f38d4 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,9 +35,6 @@ namespace io { * @file */ -/** - * @brief Builder to build options for `read_avro()`. - */ class avro_reader_options_builder; /** @@ -57,7 +54,7 @@ class avro_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read avro file. + * @param src source information used to read avro file */ explicit avro_reader_options(source_info const& src) : _source(src) {} @@ -73,54 +70,65 @@ class avro_reader_options { /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns names of the columns to be read. + * + * @return Names of the columns to be read */ [[nodiscard]] std::vector get_columns() const { return _columns; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start */ [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read */ [[nodiscard]] size_type get_num_rows() const { return _num_rows; } /** * @brief Set names of the column to be read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. + * @param val Number of rows to skip from start */ void set_skip_rows(size_type val) { _skip_rows = val; } /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. + * @param val Number of rows to read after skip */ void set_num_rows(size_type val) { _num_rows = val; } /** * @brief create avro_reader_options_builder which will build avro_reader_options. * - * @param src source information used to read avro file. - * @returns builder to build reader options. + * @param src source information used to read avro file + * @returns builder to build reader options */ static avro_reader_options_builder builder(source_info const& src); }; +/** + * @brief Builder to build options for `read_avro()`. + */ class avro_reader_options_builder { avro_reader_options options; @@ -135,15 +143,15 @@ class avro_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read avro file. + * @param src The source information used to read avro file */ explicit avro_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Set names of the column to be read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ avro_reader_options_builder& columns(std::vector col_names) { @@ -154,8 +162,8 @@ class avro_reader_options_builder { /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. - * @return this for chaining. + * @param val Number of rows to skip from start + * @return this for chaining */ avro_reader_options_builder& skip_rows(size_type val) { @@ -166,8 +174,8 @@ class avro_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. - * @return this for chaining. + * @param val Number of rows to read after skip + * @return this for chaining */ avro_reader_options_builder& num_rows(size_type val) { @@ -184,6 +192,8 @@ class avro_reader_options_builder { * @brief move avro_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `avro_reader_options` object's r-value reference */ avro_reader_options&& build() { return std::move(options); } }; @@ -198,11 +208,11 @@ class avro_reader_options_builder { * auto result = cudf::io::read_avro(options); * @endcode * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource used to allocate device memory of the table in the returned. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_avro( avro_reader_options const& options, diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 44ede9b0d63..f43952c7153 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,7 @@ class csv_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read csv file. + * @param src source information used to read csv file */ explicit csv_reader_options(source_info const& src) : _source(src) {} @@ -151,33 +151,43 @@ class csv_reader_options { /** * @brief Creates a `csv_reader_options_builder` which will build `csv_reader_options`. * - * @param src Source information to read csv file. - * @return Builder to build reader options. + * @param src Source information to read csv file + * @return Builder to build reader options */ static csv_reader_options_builder builder(source_info const& src); /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns compression format of the source. + * + * @return Compression format of the source */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns number of bytes to skip from source start. + * + * @return Number of bytes to skip from source start */ [[nodiscard]] std::size_t get_byte_range_offset() const { return _byte_range_offset; } /** * @brief Returns number of bytes to read. + * + * @return Number of bytes to read */ [[nodiscard]] std::size_t get_byte_range_size() const { return _byte_range_size; } /** * @brief Returns number of bytes to read with padding. + * + * @return Number of bytes to read with padding */ [[nodiscard]] std::size_t get_byte_range_size_with_padding() const { @@ -190,6 +200,8 @@ class csv_reader_options { /** * @brief Returns number of bytes to pad when reading. + * + * @return Number of bytes to pad when reading */ [[nodiscard]] std::size_t get_byte_range_padding() const { @@ -212,21 +224,29 @@ class csv_reader_options { /** * @brief Returns names of the columns. + * + * @return Names of the columns */ [[nodiscard]] std::vector const& get_names() const { return _names; } /** * @brief Returns prefix to be used for column ID. + * + * @return Prefix to be used for column ID */ [[nodiscard]] std::string get_prefix() const { return _prefix; } /** * @brief Whether to rename duplicate column names. + * + * @return `true` if duplicate column names are renamed */ [[nodiscard]] bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; } /** * @brief Returns names of the columns to be read. + * + * @return Names of the columns to be read */ [[nodiscard]] std::vector const& get_use_cols_names() const { @@ -235,91 +255,127 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read. + * + * @return Indexes of columns to read */ [[nodiscard]] std::vector const& get_use_cols_indexes() const { return _use_cols_indexes; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read */ [[nodiscard]] size_type get_nrows() const { return _nrows; } /** * @brief Returns number of rows to skip from start. + * + * @return Number of rows to skip from start */ [[nodiscard]] size_type get_skiprows() const { return _skiprows; } /** * @brief Returns number of rows to skip from end. + * + * @return Number of rows to skip from end */ [[nodiscard]] size_type get_skipfooter() const { return _skipfooter; } /** * @brief Returns header row index. + * + * @return Header row index */ [[nodiscard]] size_type get_header() const { return _header; } /** * @brief Returns line terminator. + * + * @return Line terminator */ [[nodiscard]] char get_lineterminator() const { return _lineterminator; } /** * @brief Returns field delimiter. + * + * @return Field delimiter */ [[nodiscard]] char get_delimiter() const { return _delimiter; } /** * @brief Returns numeric data thousands separator. + * + * @return Numeric data thousands separator */ [[nodiscard]] char get_thousands() const { return _thousands; } /** * @brief Returns decimal point character. + * + * @return Decimal point character */ [[nodiscard]] char get_decimal() const { return _decimal; } /** * @brief Returns comment line start character. + * + * @return Comment line start character */ [[nodiscard]] char get_comment() const { return _comment; } /** * @brief Whether to treat `\r\n` as line terminator. + * + * @return `true` if `\r\n` is treated as line terminator */ [[nodiscard]] bool is_enabled_windowslinetermination() const { return _windowslinetermination; } /** * @brief Whether to treat whitespace as field delimiter. + * + * @return `true` if whitespace is treated as field delimiter */ [[nodiscard]] bool is_enabled_delim_whitespace() const { return _delim_whitespace; } /** * @brief Whether to skip whitespace after the delimiter. + * + * @return `true` if whitespace is skipped after the delimiter */ [[nodiscard]] bool is_enabled_skipinitialspace() const { return _skipinitialspace; } /** * @brief Whether to ignore empty lines or parse line values as invalid. + * + * @return `true` if empty lines or parse line values are ignored as invalid */ [[nodiscard]] bool is_enabled_skip_blank_lines() const { return _skip_blank_lines; } /** * @brief Returns quoting style. + * + * @return Quoting style */ [[nodiscard]] quote_style get_quoting() const { return _quoting; } /** * @brief Returns quoting character. + * + * @return Quoting character */ [[nodiscard]] char get_quotechar() const { return _quotechar; } /** * @brief Whether a quote inside a value is double-quoted. + * + * @return `true` if a quote inside a value is double-quoted */ [[nodiscard]] bool is_enabled_doublequote() const { return _doublequote; } /** * @brief Returns names of columns to read as datetime. + * + * @return Names of columns to read as datetime */ [[nodiscard]] std::vector const& get_parse_dates_names() const { @@ -328,6 +384,8 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read as datetime. + * + * @return Indexes of columns to read as datetime */ [[nodiscard]] std::vector const& get_parse_dates_indexes() const { @@ -336,6 +394,8 @@ class csv_reader_options { /** * @brief Returns names of columns to read as hexadecimal. + * + * @return Names of columns to read as hexadecimal */ [[nodiscard]] std::vector const& get_parse_hex_names() const { @@ -344,11 +404,15 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read as hexadecimal. + * + * @return Indexes of columns to read as hexadecimal */ [[nodiscard]] std::vector const& get_parse_hex_indexes() const { return _parse_hex_indexes; } /** * @brief Returns per-column types. + * + * @return Per-column types */ std::variant, std::map> const& get_dtypes() const { @@ -357,50 +421,64 @@ class csv_reader_options { /** * @brief Returns additional values to recognize as boolean true values. + * + * @return Additional values to recognize as boolean true values */ std::vector const& get_true_values() const { return _true_values; } /** * @brief Returns additional values to recognize as boolean false values. + * + * @return Additional values to recognize as boolean false values */ std::vector const& get_false_values() const { return _false_values; } /** * @brief Returns additional values to recognize as null values. + * + * @return Additional values to recognize as null values */ std::vector const& get_na_values() const { return _na_values; } /** * @brief Whether to keep the built-in default NA values. + * + * @return `true` if the built-in default NA values are kept */ bool is_enabled_keep_default_na() const { return _keep_default_na; } /** * @brief Whether to disable null filter. + * + * @return `true` if null filter is enabled */ bool is_enabled_na_filter() const { return _na_filter; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. + * + * @return True if dates are parsed as DD/MM, false if MM/DD */ bool is_enabled_dayfirst() const { return _dayfirst; } /** * @brief Returns timestamp_type to which all timestamp columns will be cast. + * + * @return timestamp_type to which all timestamp columns will be cast */ data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Sets compression format of the source. * - * @param comp Compression type. + * @param comp Compression type */ void set_compression(compression_type comp) { _compression = comp; } /** * @brief Sets number of bytes to skip from source start. * - * @param offset Number of bytes of offset. + * @param offset Number of bytes of offset */ void set_byte_range_offset(std::size_t offset) { @@ -415,7 +493,7 @@ class csv_reader_options { /** * @brief Sets number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read */ void set_byte_range_size(std::size_t size) { @@ -430,28 +508,28 @@ class csv_reader_options { /** * @brief Sets names of the column. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_names(std::vector col_names) { _names = std::move(col_names); } /** * @brief Sets prefix to be used for column ID. * - * @param pfx String used as prefix in for each column name. + * @param pfx String used as prefix in for each column name */ void set_prefix(std::string pfx) { _prefix = pfx; } /** * @brief Sets whether to rename duplicate column names. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_mangle_dupe_cols(bool val) { _mangle_dupe_cols = val; } /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names that are needed. + * @param col_names Vector of column names that are needed */ void set_use_cols_names(std::vector col_names) { @@ -461,7 +539,7 @@ class csv_reader_options { /** * @brief Sets indexes of columns to read. * - * @param col_indices Vector of column indices that are needed. + * @param col_indices Vector of column indices that are needed */ void set_use_cols_indexes(std::vector col_indices) { @@ -471,7 +549,7 @@ class csv_reader_options { /** * @brief Sets number of rows to read. * - * @param nrows Number of rows to read. + * @param nrows Number of rows to read */ void set_nrows(size_type nrows) { @@ -487,7 +565,7 @@ class csv_reader_options { /** * @brief Sets number of rows to skip from start. * - * @param skip Number of rows to skip. + * @param skip Number of rows to skip */ void set_skiprows(size_type skip) { @@ -501,7 +579,7 @@ class csv_reader_options { /** * @brief Sets number of rows to skip from end. * - * @param skip Number of rows to skip. + * @param skip Number of rows to skip */ void set_skipfooter(size_type skip) { @@ -517,98 +595,98 @@ class csv_reader_options { /** * @brief Sets header row index. * - * @param hdr Index where header row is located. + * @param hdr Index where header row is located */ void set_header(size_type hdr) { _header = hdr; } /** * @brief Sets line terminator * - * @param term A character to indicate line termination. + * @param term A character to indicate line termination */ void set_lineterminator(char term) { _lineterminator = term; } /** * @brief Sets field delimiter. * - * @param delim A character to indicate delimiter. + * @param delim A character to indicate delimiter */ void set_delimiter(char delim) { _delimiter = delim; } /** * @brief Sets numeric data thousands separator. * - * @param val A character that separates thousands. + * @param val A character that separates thousands */ void set_thousands(char val) { _thousands = val; } /** * @brief Sets decimal point character. * - * @param val A character that indicates decimal values. + * @param val A character that indicates decimal values */ void set_decimal(char val) { _decimal = val; } /** * @brief Sets comment line start character. * - * @param val A character that indicates comment. + * @param val A character that indicates comment */ void set_comment(char val) { _comment = val; } /** * @brief Sets whether to treat `\r\n` as line terminator. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_windowslinetermination(bool val) { _windowslinetermination = val; } /** * @brief Sets whether to treat whitespace as field delimiter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_delim_whitespace(bool val) { _delim_whitespace = val; } /** * @brief Sets whether to skip whitespace after the delimiter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_skipinitialspace(bool val) { _skipinitialspace = val; } /** * @brief Sets whether to ignore empty lines or parse line values as invalid. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_skip_blank_lines(bool val) { _skip_blank_lines = val; } /** * @brief Sets quoting style. * - * @param style Quoting style used. + * @param style Quoting style used */ void set_quoting(quote_style style) { _quoting = style; } /** * @brief Sets quoting character. * - * @param ch A character to indicate quoting. + * @param ch A character to indicate quoting */ void set_quotechar(char ch) { _quotechar = ch; } /** * @brief Sets a quote inside a value is double-quoted. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_doublequote(bool val) { _doublequote = val; } /** * @brief Sets names of columns to read as datetime. * - * @param col_names Vector of column names to infer as datetime. + * @param col_names Vector of column names to infer as datetime */ void set_parse_dates(std::vector col_names) { @@ -618,7 +696,7 @@ class csv_reader_options { /** * @brief Sets indexes of columns to read as datetime. * - * @param col_indices Vector of column indices to infer as datetime. + * @param col_indices Vector of column indices to infer as datetime */ void set_parse_dates(std::vector col_indices) { @@ -652,14 +730,14 @@ class csv_reader_options { /** * @brief Sets per-column types * - * @param types Vector specifying the columns' target data types. + * @param types Vector specifying the columns' target data types */ void set_dtypes(std::vector types) { _dtypes = std::move(types); } /** * @brief Sets additional values to recognize as boolean true values. * - * @param vals Vector of values to be considered to be `true`. + * @param vals Vector of values to be considered to be `true` */ void set_true_values(std::vector vals) { @@ -669,7 +747,7 @@ class csv_reader_options { /** * @brief Sets additional values to recognize as boolean false values. * - * @param vals Vector of values to be considered to be `false`. + * @param vals Vector of values to be considered to be `false` */ void set_false_values(std::vector vals) { @@ -679,7 +757,7 @@ class csv_reader_options { /** * @brief Sets additional values to recognize as null values. * - * @param vals Vector of values to be considered to be null. + * @param vals Vector of values to be considered to be null */ void set_na_values(std::vector vals) { @@ -693,14 +771,14 @@ class csv_reader_options { /** * @brief Sets whether to keep the built-in default NA values. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_keep_default_na(bool val) { _keep_default_na = val; } /** * @brief Sets whether to disable null filter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_na_filter(bool val) { @@ -711,20 +789,24 @@ class csv_reader_options { /** * @brief Sets whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_dayfirst(bool val) { _dayfirst = val; } /** * @brief Sets timestamp_type to which all timestamp columns will be cast. * - * @param type Dtype to which all timestamp column will be cast. + * @param type Dtype to which all timestamp column will be cast */ void set_timestamp_type(data_type type) { _timestamp_type = type; } }; +/** + * @brief Builder to build options for `read_csv()`. + * + */ class csv_reader_options_builder { - csv_reader_options options; + csv_reader_options options; ///< Options to be built. public: /** @@ -737,15 +819,15 @@ class csv_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read csv file. + * @param src The source information used to read csv file */ csv_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Sets compression format of the source. * - * @param comp Compression type. - * @return this for chaining. + * @param comp Compression type + * @return this for chaining */ csv_reader_options_builder& compression(compression_type comp) { @@ -756,8 +838,8 @@ class csv_reader_options_builder { /** * @brief Sets number of bytes to skip from source start. * - * @param offset Number of bytes of offset. - * @return this for chaining. + * @param offset Number of bytes of offset + * @return this for chaining */ csv_reader_options_builder& byte_range_offset(std::size_t offset) { @@ -768,8 +850,8 @@ class csv_reader_options_builder { /** * @brief Sets number of bytes to read. * - * @param size Number of bytes to read. - * @return this for chaining. + * @param size Number of bytes to read + * @return this for chaining */ csv_reader_options_builder& byte_range_size(std::size_t size) { @@ -780,8 +862,8 @@ class csv_reader_options_builder { /** * @brief Sets names of the column. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ csv_reader_options_builder& names(std::vector col_names) { @@ -792,8 +874,8 @@ class csv_reader_options_builder { /** * @brief Sets prefix to be used for column ID. * - * @param pfx String used as prefix in for each column name. - * @return this for chaining. + * @param pfx String used as prefix in for each column name + * @return this for chaining */ csv_reader_options_builder& prefix(std::string pfx) { @@ -804,8 +886,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to rename duplicate column names. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& mangle_dupe_cols(bool val) { @@ -816,8 +898,8 @@ class csv_reader_options_builder { /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names that are needed. - * @return this for chaining. + * @param col_names Vector of column names that are needed + * @return this for chaining */ csv_reader_options_builder& use_cols_names(std::vector col_names) { @@ -828,8 +910,8 @@ class csv_reader_options_builder { /** * @brief Sets indexes of columns to read. * - * @param col_indices Vector of column indices that are needed. - * @return this for chaining. + * @param col_indices Vector of column indices that are needed + * @return this for chaining */ csv_reader_options_builder& use_cols_indexes(std::vector col_indices) { @@ -840,8 +922,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param rows Number of rows to read. - * @return this for chaining. + * @param rows Number of rows to read + * @return this for chaining */ csv_reader_options_builder& nrows(size_type rows) { @@ -852,8 +934,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to skip from start. * - * @param skip Number of rows to skip. - * @return this for chaining. + * @param skip Number of rows to skip + * @return this for chaining */ csv_reader_options_builder& skiprows(size_type skip) { @@ -864,8 +946,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to skip from end. * - * @param skip Number of rows to skip. - * @return this for chaining. + * @param skip Number of rows to skip + * @return this for chaining */ csv_reader_options_builder& skipfooter(size_type skip) { @@ -876,8 +958,8 @@ class csv_reader_options_builder { /** * @brief Sets header row index. * - * @param hdr Index where header row is located. - * @return this for chaining. + * @param hdr Index where header row is located + * @return this for chaining */ csv_reader_options_builder& header(size_type hdr) { @@ -888,8 +970,8 @@ class csv_reader_options_builder { /** * @brief Sets line terminator. * - * @param term A character to indicate line termination. - * @return this for chaining. + * @param term A character to indicate line termination + * @return this for chaining */ csv_reader_options_builder& lineterminator(char term) { @@ -900,8 +982,8 @@ class csv_reader_options_builder { /** * @brief Sets field delimiter * - * @param delim A character to indicate delimiter. - * @return this for chaining. + * @param delim A character to indicate delimiter + * @return this for chaining */ csv_reader_options_builder& delimiter(char delim) { @@ -912,8 +994,8 @@ class csv_reader_options_builder { /** * @brief Sets numeric data thousands separator. * - * @param val A character that separates thousands. - * @return this for chaining. + * @param val A character that separates thousands + * @return this for chaining */ csv_reader_options_builder& thousands(char val) { @@ -924,8 +1006,8 @@ class csv_reader_options_builder { /** * @brief Sets decimal point character. * - * @param val A character that indicates decimal values. - * @return this for chaining. + * @param val A character that indicates decimal values + * @return this for chaining */ csv_reader_options_builder& decimal(char val) { @@ -936,8 +1018,8 @@ class csv_reader_options_builder { /** * @brief Sets comment line start character. * - * @param val A character that indicates comment. - * @return this for chaining. + * @param val A character that indicates comment + * @return this for chaining */ csv_reader_options_builder& comment(char val) { @@ -948,8 +1030,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to treat `\r\n` as line terminator. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& windowslinetermination(bool val) { @@ -960,8 +1042,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to treat whitespace as field delimiter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& delim_whitespace(bool val) { @@ -972,8 +1054,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to skip whitespace after the delimiter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& skipinitialspace(bool val) { @@ -984,8 +1066,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to ignore empty lines or parse line values as invalid. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& skip_blank_lines(bool val) { @@ -996,8 +1078,8 @@ class csv_reader_options_builder { /** * @brief Sets quoting style. * - * @param style Quoting style used. - * @return this for chaining. + * @param style Quoting style used + * @return this for chaining */ csv_reader_options_builder& quoting(quote_style style) { @@ -1008,8 +1090,8 @@ class csv_reader_options_builder { /** * @brief Sets quoting character. * - * @param ch A character to indicate quoting. - * @return this for chaining. + * @param ch A character to indicate quoting + * @return this for chaining */ csv_reader_options_builder& quotechar(char ch) { @@ -1020,8 +1102,8 @@ class csv_reader_options_builder { /** * @brief Sets a quote inside a value is double-quoted. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& doublequote(bool val) { @@ -1032,8 +1114,8 @@ class csv_reader_options_builder { /** * @brief Sets names of columns to read as datetime. * - * @param col_names Vector of column names to read as datetime. - * @return this for chaining. + * @param col_names Vector of column names to read as datetime + * @return this for chaining */ csv_reader_options_builder& parse_dates(std::vector col_names) { @@ -1045,7 +1127,7 @@ class csv_reader_options_builder { * @brief Sets indexes of columns to read as datetime. * * @param col_indices Vector of column indices to read as datetime - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_dates(std::vector col_indices) { @@ -1057,7 +1139,7 @@ class csv_reader_options_builder { * @brief Sets names of columns to parse as hexadecimal. * * @param col_names Vector of column names to parse as hexadecimal - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_hex(std::vector col_names) { @@ -1069,7 +1151,7 @@ class csv_reader_options_builder { * @brief Sets indexes of columns to parse as hexadecimal. * * @param col_indices Vector of column indices to parse as hexadecimal - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_hex(std::vector col_indices) { @@ -1081,7 +1163,7 @@ class csv_reader_options_builder { * @brief Sets per-column types. * * @param types Column name -> data type map specifying the columns' target data types - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& dtypes(std::map types) { @@ -1092,8 +1174,8 @@ class csv_reader_options_builder { /** * @brief Sets per-column types. * - * @param types Vector of data types in which the column needs to be read. - * @return this for chaining. + * @param types Vector of data types in which the column needs to be read + * @return this for chaining */ csv_reader_options_builder& dtypes(std::vector types) { @@ -1104,8 +1186,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as boolean true values. * - * @param vals Vector of values to be considered to be `true`. - * @return this for chaining. + * @param vals Vector of values to be considered to be `true` + * @return this for chaining */ csv_reader_options_builder& true_values(std::vector vals) { @@ -1116,8 +1198,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as boolean false values. * - * @param vals Vector of values to be considered to be `false`. - * @return this for chaining. + * @param vals Vector of values to be considered to be `false` + * @return this for chaining */ csv_reader_options_builder& false_values(std::vector vals) { @@ -1128,8 +1210,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as null values. * - * @param vals Vector of values to be considered to be null. - * @return this for chaining. + * @param vals Vector of values to be considered to be null + * @return this for chaining */ csv_reader_options_builder& na_values(std::vector vals) { @@ -1140,8 +1222,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to keep the built-in default NA values. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& keep_default_na(bool val) { @@ -1152,8 +1234,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to disable null filter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& na_filter(bool val) { @@ -1164,8 +1246,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& dayfirst(bool val) { @@ -1176,8 +1258,8 @@ class csv_reader_options_builder { /** * @brief Sets timestamp_type to which all timestamp columns will be cast. * - * @param type Dtype to which all timestamp column will be cast. - * @return this for chaining. + * @param type Dtype to which all timestamp column will be cast + * @return this for chaining */ csv_reader_options_builder& timestamp_type(data_type type) { @@ -1194,6 +1276,8 @@ class csv_reader_options_builder { * @brief move csv_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `csv_reader_options` object's r-value reference */ csv_reader_options&& build() { return std::move(options); } }; @@ -1208,11 +1292,11 @@ class csv_reader_options_builder { * auto result = cudf::io::read_csv(options); * @endcode * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource used to allocate device memory of the table in the returned. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_csv( csv_reader_options options, @@ -1258,8 +1342,8 @@ class csv_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit csv_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table), _rows_per_chunk(table.num_rows()) @@ -1279,60 +1363,80 @@ class csv_writer_options { /** * @brief Create builder to create `csv_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build csv_writer_options. + * @return Builder to build csv_writer_options */ static csv_writer_options_builder builder(sink_info const& sink, table_view const& table); /** * @brief Returns sink used for writer output. + * + * @return sink used for writer output */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns table that would be written to output. + * + * @return Table that would be written to output */ [[nodiscard]] table_view const& get_table() const { return _table; } /** * @brief Returns optional associated metadata. + * + * @return Optional associated metadata */ [[nodiscard]] table_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns string to used for null entries. + * + * @return string to used for null entries */ [[nodiscard]] std::string get_na_rep() const { return _na_rep; } /** * @brief Whether to write headers to csv. + * + * @return `true` if writing headers to csv */ [[nodiscard]] bool is_enabled_include_header() const { return _include_header; } /** * @brief Returns maximum number of rows to process for each file write. + * + * @return Maximum number of rows to process for each file write */ [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; } /** * @brief Returns character used for separating lines. + * + * @return Character used for separating lines */ [[nodiscard]] std::string get_line_terminator() const { return _line_terminator; } /** * @brief Returns character used for separating lines. + * + * @return Character used for separating lines */ [[nodiscard]] char get_inter_column_delimiter() const { return _inter_column_delimiter; } /** * @brief Returns string used for values != 0 in INT8 types. + * + * @return string used for values != 0 in INT8 types */ [[nodiscard]] std::string get_true_value() const { return _true_value; } /** * @brief Returns string used for values == 0 in INT8 types. + * + * @return string used for values == 0 in INT8 types */ [[nodiscard]] std::string get_false_value() const { return _false_value; } @@ -1340,62 +1444,65 @@ class csv_writer_options { /** * @brief Sets optional associated metadata. * - @param metadata Associated metadata. + @param metadata Associated metadata */ void set_metadata(table_metadata* metadata) { _metadata = metadata; } /** * @brief Sets string to used for null entries. * - * @param val String to represent null value. + * @param val String to represent null value */ void set_na_rep(std::string val) { _na_rep = val; } /** * @brief Enables/Disables headers being written to csv. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_include_header(bool val) { _include_header = val; } /** * @brief Sets maximum number of rows to process for each file write. * - * @param val Number of rows per chunk. + * @param val Number of rows per chunk */ void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; } /** * @brief Sets character used for separating lines. * - * @param term Character to represent line termination. + * @param term Character to represent line termination */ void set_line_terminator(std::string term) { _line_terminator = term; } /** * @brief Sets character used for separating lines. * - * @param delim Character to indicate delimiting. + * @param delim Character to indicate delimiting */ void set_inter_column_delimiter(char delim) { _inter_column_delimiter = delim; } /** * @brief Sets string used for values != 0 in INT8 types. * - * @param val String to represent values != 0 in INT8 types. + * @param val String to represent values != 0 in INT8 types */ void set_true_value(std::string val) { _true_value = val; } /** * @brief Sets string used for values == 0 in INT8 types. * - * @param val String to represent values == 0 in INT8 types. + * @param val String to represent values == 0 in INT8 types */ void set_false_value(std::string val) { _false_value = val; } }; +/** + * @brief Builder to build options for `writer_csv()` + */ class csv_writer_options_builder { - csv_writer_options options; + csv_writer_options options; ///< Options to be built. public: /** @@ -1408,8 +1515,8 @@ class csv_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit csv_writer_options_builder(sink_info const& sink, table_view const& table) : options{sink, table} @@ -1419,8 +1526,8 @@ class csv_writer_options_builder { /** * @brief Sets optional associated metadata. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ csv_writer_options_builder& metadata(table_metadata* metadata) { @@ -1431,8 +1538,8 @@ class csv_writer_options_builder { /** * @brief Sets string to used for null entries. * - * @param val String to represent null value. - * @return this for chaining. + * @param val String to represent null value + * @return this for chaining */ csv_writer_options_builder& na_rep(std::string val) { @@ -1443,8 +1550,8 @@ class csv_writer_options_builder { /** * @brief Enables/Disables headers being written to csv. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_writer_options_builder& include_header(bool val) { @@ -1455,8 +1562,8 @@ class csv_writer_options_builder { /** * @brief Sets maximum number of rows to process for each file write. * - * @param val Number of rows per chunk. - * @return this for chaining. + * @param val Number of rows per chunk + * @return this for chaining */ csv_writer_options_builder& rows_per_chunk(int val) { @@ -1467,8 +1574,8 @@ class csv_writer_options_builder { /** * @brief Sets character used for separating lines. * - * @param term Character to represent line termination. - * @return this for chaining. + * @param term Character to represent line termination + * @return this for chaining */ csv_writer_options_builder& line_terminator(std::string term) { @@ -1479,8 +1586,8 @@ class csv_writer_options_builder { /** * @brief Sets character used for separating lines. * - * @param delim Character to indicate delimiting. - * @return this for chaining. + * @param delim Character to indicate delimiting + * @return this for chaining */ csv_writer_options_builder& inter_column_delimiter(char delim) { @@ -1491,8 +1598,8 @@ class csv_writer_options_builder { /** * @brief Sets string used for values != 0 in INT8 types. * - * @param val String to represent values != 0 in INT8 types. - * @return this for chaining. + * @param val String to represent values != 0 in INT8 types + * @return this for chaining */ csv_writer_options_builder& true_value(std::string val) { @@ -1503,8 +1610,8 @@ class csv_writer_options_builder { /** * @brief Sets string used for values == 0 in INT8 types. * - * @param val String to represent values == 0 in INT8 types. - * @return this for chaining. + * @param val String to represent values == 0 in INT8 types + * @return this for chaining */ csv_writer_options_builder& false_value(std::string val) { @@ -1521,6 +1628,8 @@ class csv_writer_options_builder { * @brief move `csv_writer_options` member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `csv_writer_options` object's r-value reference */ csv_writer_options&& build() { return std::move(options); } }; @@ -1539,8 +1648,8 @@ class csv_writer_options_builder { * cudf::io::write_csv(options); * @endcode * - * @param options Settings for controlling writing behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling writing behavior + * @param mr Device memory resource to use for device memory allocation */ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index e2d4de83b49..9ccb5ec4d58 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -36,9 +36,6 @@ namespace io { * @file */ -/** - * @brief Builds settings to use for `read_json()`. - */ class json_reader_options_builder; /** @@ -86,7 +83,7 @@ class json_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read parquet file. + * @param src source information used to read parquet file */ explicit json_reader_options(const source_info& src) : _source(src) {} @@ -103,18 +100,22 @@ class json_reader_options { /** * @brief create json_reader_options_builder which will build json_reader_options. * - * @param src source information used to read json file. - * @returns builder to build the options. + * @param src source information used to read json file + * @returns builder to build the options */ static json_reader_options_builder builder(source_info const& src); /** * @brief Returns source info. + * + * @returns Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns data types of the columns. + * + * @returns Data types of the columns */ std::variant, std::map> const& get_dtypes() const { @@ -123,21 +124,29 @@ class json_reader_options { /** * @brief Returns compression format of the source. + * + * @return Compression format of the source */ compression_type get_compression() const { return _compression; } /** * @brief Returns number of bytes to skip from source start. + * + * @return Number of bytes to skip from source start */ size_t get_byte_range_offset() const { return _byte_range_offset; } /** * @brief Returns number of bytes to read. + * + * @return Number of bytes to read */ size_t get_byte_range_size() const { return _byte_range_size; } /** * @brief Returns number of bytes to read with padding. + * + * @return Number of bytes to read with padding */ size_t get_byte_range_size_with_padding() const { @@ -150,6 +159,8 @@ class json_reader_options { /** * @brief Returns number of bytes to pad when reading. + * + * @return Number of bytes to pad */ size_t get_byte_range_padding() const { @@ -170,11 +181,15 @@ class json_reader_options { /** * @brief Whether to read the file as a json object per line. + * + * @return `true` if reading the file as a json object per line */ bool is_enabled_lines() const { return _lines; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. + * + * @returns true if dates are parsed as DD/MM, false if MM/DD */ bool is_enabled_dayfirst() const { return _dayfirst; } @@ -188,46 +203,49 @@ class json_reader_options { /** * @brief Set data types for columns to be read. * - * @param types Vector dtypes in string format. + * @param types Vector dtypes in string format */ void set_dtypes(std::map types) { _dtypes = std::move(types); } /** * @brief Set the compression type. * - * @param comp_type The compression type used. + * @param comp_type The compression type used */ void set_compression(compression_type comp_type) { _compression = comp_type; } /** * @brief Set number of bytes to skip from source start. * - * @param offset Number of bytes of offset. + * @param offset Number of bytes of offset */ void set_byte_range_offset(size_type offset) { _byte_range_offset = offset; } /** * @brief Set number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read */ void set_byte_range_size(size_type size) { _byte_range_size = size; } /** * @brief Set whether to read the file as a json object per line. * - * @param val Boolean value to enable/disable the option to read each line as a json object. + * @param val Boolean value to enable/disable the option to read each line as a json object */ void enable_lines(bool val) { _lines = val; } /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable day first parsing format. + * @param val Boolean value to enable/disable day first parsing format */ void enable_dayfirst(bool val) { _dayfirst = val; } }; +/** + * @brief Builds settings to use for `read_json()`. + */ class json_reader_options_builder { json_reader_options options; @@ -242,7 +260,7 @@ class json_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read avro file. + * @param src The source information used to read avro file */ explicit json_reader_options_builder(source_info const& src) : options(src) {} @@ -261,7 +279,7 @@ class json_reader_options_builder { /** * @brief Set data types for columns to be read. * - * @param types Column name -> dtype map. + * @param types Column name -> dtype map * @return this for chaining */ json_reader_options_builder& dtypes(std::map types) @@ -273,8 +291,8 @@ class json_reader_options_builder { /** * @brief Set the compression type. * - * @param comp_type The compression type used. - * @return this for chaining. + * @param comp_type The compression type used + * @return this for chaining */ json_reader_options_builder& compression(compression_type comp_type) { @@ -285,8 +303,8 @@ class json_reader_options_builder { /** * @brief Set number of bytes to skip from source start. * - * @param offset Number of bytes of offset. - * @return this for chaining. + * @param offset Number of bytes of offset + * @return this for chaining */ json_reader_options_builder& byte_range_offset(size_type offset) { @@ -297,7 +315,7 @@ class json_reader_options_builder { /** * @brief Set number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read * @return this for chaining */ json_reader_options_builder& byte_range_size(size_type size) @@ -309,8 +327,8 @@ class json_reader_options_builder { /** * @brief Set whether to read the file as a json object per line. * - * @param val Boolean value to enable/disable the option to read each line as a json object. - * @return this for chaining. + * @param val Boolean value to enable/disable the option to read each line as a json object + * @return this for chaining */ json_reader_options_builder& lines(bool val) { @@ -321,8 +339,8 @@ class json_reader_options_builder { /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable day first parsing format. - * @return this for chaining. + * @param val Boolean value to enable/disable day first parsing format + * @return this for chaining */ json_reader_options_builder& dayfirst(bool val) { @@ -339,6 +357,8 @@ class json_reader_options_builder { * @brief move json_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `json_reader_options` object r-value reference */ json_reader_options&& build() { return std::move(options); } }; @@ -353,11 +373,11 @@ class json_reader_options_builder { * auto result = cudf::io::read_json(options); * @endcode * - * @param options Settings for controlling reading behavior. + * @param options Settings for controlling reading behavior * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_json( json_reader_options options, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 9e8fd1244d0..e9b6818099e 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -34,9 +34,9 @@ namespace io { * @file */ -constexpr size_t default_stripe_size_bytes = 64 * 1024 * 1024; -constexpr size_type default_stripe_size_rows = 1000000; -constexpr size_type default_row_index_stride = 10000; +constexpr size_t default_stripe_size_bytes = 64 * 1024 * 1024; ///< 64MB default orc stripe size +constexpr size_type default_stripe_size_rows = 1000000; ///< 1M rows default orc stripe rows +constexpr size_type default_row_index_stride = 10000; ///< 10K rows default orc row index stride /** * @brief Builds settings to use for `read_orc()`. @@ -75,7 +75,7 @@ class orc_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read orc file. + * @param src source information used to read orc file */ explicit orc_reader_options(source_info const& src) : _source(src) {} @@ -90,53 +90,71 @@ class orc_reader_options { /** * @brief Creates `orc_reader_options_builder` which will build `orc_reader_options`. * - * @param src Source information to read orc file. - * @return Builder to build reader options. + * @param src Source information to read orc file + * @return Builder to build reader options */ static orc_reader_options_builder builder(source_info const& src); /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns names of the columns to read. + * + * @return Names of the columns to read */ [[nodiscard]] std::vector const& get_columns() const { return _columns; } /** * @brief Returns vector of vectors, stripes to read for each input source + * + * @return Vector of vectors, stripes to read for each input source */ std::vector> const& get_stripes() const { return _stripes; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start */ size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of row to read. + * + * @return Number of row to read */ size_type get_num_rows() const { return _num_rows; } /** * @brief Whether to use row index to speed-up reading. + * + * @return `true` if row index is used to speed-up reading */ bool is_enabled_use_index() const { return _use_index; } /** * @brief Whether to use numpy-compatible dtypes. + * + * @return `true` if numpy-compatible dtypes are used */ bool is_enabled_use_np_dtypes() const { return _use_np_dtypes; } /** * @brief Returns timestamp type to which timestamp column will be cast. + * + * @return Timestamp type to which timestamp column will be cast */ data_type get_timestamp_type() const { return _timestamp_type; } /** - * @brief Fully qualified names of columns that should be read as 128-bit Decimal. + * @brief Returns fully qualified names of columns that should be read as 128-bit Decimal. + * + * @return Fully qualified names of columns that should be read as 128-bit Decimal */ std::vector const& get_decimal128_columns() const { return _decimal128_columns; } @@ -145,7 +163,7 @@ class orc_reader_options { /** * @brief Sets names of the column to read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } @@ -164,7 +182,7 @@ class orc_reader_options { /** * @brief Sets number of rows to skip from the start. * - * @param rows Number of rows. + * @param rows Number of rows */ void set_skip_rows(size_type rows) { @@ -175,7 +193,7 @@ class orc_reader_options { /** * @brief Sets number of row to read. * - * @param nrows Number of rows. + * @param nrows Number of rows */ void set_num_rows(size_type nrows) { @@ -186,28 +204,28 @@ class orc_reader_options { /** * @brief Enable/Disable use of row index to speed-up reading. * - * @param use Boolean value to enable/disable row index use. + * @param use Boolean value to enable/disable row index use */ void enable_use_index(bool use) { _use_index = use; } /** * @brief Enable/Disable use of numpy-compatible dtypes * - * @param use Boolean value to enable/disable. + * @param use Boolean value to enable/disable */ void enable_use_np_dtypes(bool use) { _use_np_dtypes = use; } /** * @brief Sets timestamp type to which timestamp column will be cast. * - * @param type Type of timestamp. + * @param type Type of timestamp */ void set_timestamp_type(data_type type) { _timestamp_type = type; } /** * @brief Set columns that should be read as 128-bit Decimal * - * @param val Vector of fully qualified column names. + * @param val Vector of fully qualified column names */ void set_decimal128_columns(std::vector val) { @@ -215,6 +233,9 @@ class orc_reader_options { } }; +/** + * @brief Builds settings to use for `read_orc()`. + */ class orc_reader_options_builder { orc_reader_options options; @@ -229,15 +250,15 @@ class orc_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read orc file. + * @param src The source information used to read orc file */ explicit orc_reader_options_builder(source_info const& src) : options{src} {}; /** * @brief Sets names of the column to read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ orc_reader_options_builder& columns(std::vector col_names) { @@ -249,7 +270,7 @@ class orc_reader_options_builder { * @brief Sets list of individual stripes to read per source * * @param stripes Vector of vectors, mapping stripes to read to input sources - * @return this for chaining. + * @return this for chaining */ orc_reader_options_builder& stripes(std::vector> stripes) { @@ -260,8 +281,8 @@ class orc_reader_options_builder { /** * @brief Sets number of rows to skip from the start. * - * @param rows Number of rows. - * @return this for chaining. + * @param rows Number of rows + * @return this for chaining */ orc_reader_options_builder& skip_rows(size_type rows) { @@ -272,8 +293,8 @@ class orc_reader_options_builder { /** * @brief Sets number of row to read. * - * @param nrows Number of rows. - * @return this for chaining. + * @param nrows Number of rows + * @return this for chaining */ orc_reader_options_builder& num_rows(size_type nrows) { @@ -284,8 +305,8 @@ class orc_reader_options_builder { /** * @brief Enable/Disable use of row index to speed-up reading. * - * @param use Boolean value to enable/disable row index use. - * @return this for chaining. + * @param use Boolean value to enable/disable row index use + * @return this for chaining */ orc_reader_options_builder& use_index(bool use) { @@ -296,8 +317,8 @@ class orc_reader_options_builder { /** * @brief Enable/Disable use of numpy-compatible dtypes. * - * @param use Boolean value to enable/disable. - * @return this for chaining. + * @param use Boolean value to enable/disable + * @return this for chaining */ orc_reader_options_builder& use_np_dtypes(bool use) { @@ -308,8 +329,8 @@ class orc_reader_options_builder { /** * @brief Sets timestamp type to which timestamp column will be cast. * - * @param type Type of timestamp. - * @return this for chaining. + * @param type Type of timestamp + * @return this for chaining */ orc_reader_options_builder& timestamp_type(data_type type) { @@ -320,8 +341,8 @@ class orc_reader_options_builder { /** * @brief Columns that should be read as 128-bit Decimal * - * @param val Vector of column names. - * @return this for chaining. + * @param val Vector of column names + * @return this for chaining */ orc_reader_options_builder& decimal128_columns(std::vector val) { @@ -338,6 +359,8 @@ class orc_reader_options_builder { * @brief move orc_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `orc_reader_options` object's r-value reference */ orc_reader_options&& build() { return std::move(options); } }; @@ -355,11 +378,11 @@ class orc_reader_options_builder { * Note: Support for reading files with struct columns is currently experimental, the output may not * be as reliable as reading for other datatypes. * - * @param options Settings for controlling reading behavior. + * @param options Settings for controlling reading behavior * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * - * @return The set of columns. + * @return The set of columns */ table_with_metadata read_orc( orc_reader_options const& options, @@ -417,8 +440,8 @@ class orc_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit orc_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table) @@ -436,25 +459,31 @@ class orc_writer_options { /** * @brief Create builder to create `orc_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build `orc_writer_options`. + * @return Builder to build `orc_writer_options` */ static orc_writer_options_builder builder(sink_info const& sink, table_view const& table); /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression type. + * + * @return Compression type */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Whether writing column statistics is enabled/disabled. + * + * @return `true` if writing column statistics is enabled */ [[nodiscard]] bool is_enabled_statistics() const { @@ -463,21 +492,29 @@ class orc_writer_options { /** * @brief Returns frequency of statistics collection. + * + * @return Frequency of statistics collection */ [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; } /** * @brief Returns maximum stripe size, in bytes. + * + * @return Maximum stripe size, in bytes */ [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. + * + * @return Maximum stripe size, in rows */ [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. + * + * @return Row index stride */ auto get_row_index_stride() const { @@ -487,16 +524,22 @@ class orc_writer_options { /** * @brief Returns table to be written to output. + * + * @return Table to be written to output */ [[nodiscard]] table_view get_table() const { return _table; } /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ [[nodiscard]] std::map const& get_key_value_metadata() const { @@ -508,7 +551,7 @@ class orc_writer_options { /** * @brief Sets compression type. * - * @param comp Compression type. + * @param comp Compression type */ void set_compression(compression_type comp) { _compression = comp; } @@ -520,12 +563,14 @@ class orc_writer_options { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. + * @param val Frequency of statistics collection */ void enable_statistics(statistics_freq val) { _stats_freq = val; } /** * @brief Sets the maximum stripe size, in bytes. + * + * @param size_bytes Maximum stripe size, in bytes to be set */ void set_stripe_size_bytes(size_t size_bytes) { @@ -538,6 +583,8 @@ class orc_writer_options { * * If the stripe size is smaller that the row group size, row group size will be reduced to math * the stripe size. + * + * @param size_rows Maximum stripe size, in rows to be set */ void set_stripe_size_rows(size_type size_rows) { @@ -549,6 +596,8 @@ class orc_writer_options { * @brief Sets the row index stride. * * Rounded down to a multiple of 8. + * + * @param stride Row index stride to be set */ void set_row_index_stride(size_type stride) { @@ -559,14 +608,14 @@ class orc_writer_options { /** * @brief Sets table to be written to output. * - * @param tbl Table for the output. + * @param tbl Table for the output */ void set_table(table_view tbl) { _table = tbl; } /** * @brief Sets associated metadata * - * @param meta Associated metadata. + * @param meta Associated metadata */ void set_metadata(table_input_metadata const* meta) { _metadata = meta; } @@ -581,6 +630,9 @@ class orc_writer_options { } }; +/** + * @brief Builds settings to use for `write_orc()`. + */ class orc_writer_options_builder { orc_writer_options options; @@ -595,8 +647,8 @@ class orc_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ orc_writer_options_builder(sink_info const& sink, table_view const& table) : options{sink, table} { @@ -605,8 +657,8 @@ class orc_writer_options_builder { /** * @brief Sets compression type. * - * @param comp The compression type to use. - * @return this for chaining. + * @param comp The compression type to use + * @return this for chaining */ orc_writer_options_builder& compression(compression_type comp) { @@ -622,8 +674,8 @@ class orc_writer_options_builder { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Level of statistics collection. - * @return this for chaining. + * @param val Level of statistics collection + * @return this for chaining */ orc_writer_options_builder& enable_statistics(statistics_freq val) { @@ -635,7 +687,7 @@ class orc_writer_options_builder { * @brief Sets the maximum stripe size, in bytes. * * @param val maximum stripe size - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& stripe_size_bytes(size_t val) { @@ -647,7 +699,7 @@ class orc_writer_options_builder { * @brief Sets the maximum number of rows in output stripes. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& stripe_size_rows(size_type val) { @@ -659,7 +711,7 @@ class orc_writer_options_builder { * @brief Sets the row index stride. * * @param val new row index stride - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& row_index_stride(size_type val) { @@ -670,8 +722,8 @@ class orc_writer_options_builder { /** * @brief Sets table to be written to output. * - * @param tbl Table for the output. - * @return this for chaining. + * @param tbl Table for the output + * @return this for chaining */ orc_writer_options_builder& table(table_view tbl) { @@ -682,8 +734,8 @@ class orc_writer_options_builder { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. - * @return this for chaining. + * @param meta Associated metadata + * @return this for chaining */ orc_writer_options_builder& metadata(table_input_metadata const* meta) { @@ -695,7 +747,7 @@ class orc_writer_options_builder { * @brief Sets Key-Value footer metadata. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& key_value_metadata(std::map metadata) { @@ -712,6 +764,8 @@ class orc_writer_options_builder { * @brief move orc_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `orc_writer_options` object's r-value reference */ orc_writer_options&& build() { return std::move(options); } }; @@ -729,8 +783,8 @@ class orc_writer_options_builder { * Note: Support for writing tables with struct columns is currently experimental, the output may * not be as reliable as writing for other datatypes. * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource to use for device memory allocation */ void write_orc(orc_writer_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -766,7 +820,7 @@ class chunked_orc_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ chunked_orc_writer_options(sink_info const& sink) : _sink(sink) {} @@ -781,39 +835,51 @@ class chunked_orc_writer_options { /** * @brief Create builder to create `chunked_orc_writer_options`. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output * - * @return Builder to build chunked_orc_writer_options. + * @return Builder to build chunked_orc_writer_options */ static chunked_orc_writer_options_builder builder(sink_info const& sink); /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression type. + * + * @return Compression type */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns granularity of statistics collection. + * + * @return Granularity of statistics collection */ [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; } /** * @brief Returns maximum stripe size, in bytes. + * + * @return Maximum stripe size, in bytes */ [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. + * + * @return Maximum stripe size, in rows */ [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. + * + * @return Row index stride */ auto get_row_index_stride() const { @@ -823,11 +889,15 @@ class chunked_orc_writer_options { /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ [[nodiscard]] std::map const& get_key_value_metadata() const { @@ -839,7 +909,7 @@ class chunked_orc_writer_options { /** * @brief Sets compression type. * - * @param comp The compression type to use. + * @param comp The compression type to use */ void set_compression(compression_type comp) { _compression = comp; } @@ -851,12 +921,14 @@ class chunked_orc_writer_options { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. + * @param val Frequency of statistics collection */ void enable_statistics(statistics_freq val) { _stats_freq = val; } /** * @brief Sets the maximum stripe size, in bytes. + * + * @param size_bytes Maximum stripe size, in bytes to be set */ void set_stripe_size_bytes(size_t size_bytes) { @@ -869,6 +941,8 @@ class chunked_orc_writer_options { * * If the stripe size is smaller that the row group size, row group size will be reduced to math * the stripe size. + * + * @param size_rows Maximum stripe size, in rows to be set */ void set_stripe_size_rows(size_type size_rows) { @@ -880,6 +954,8 @@ class chunked_orc_writer_options { * @brief Sets the row index stride. * * Rounded down to a multiple of 8. + * + * @param stride Row index stride to be set */ void set_row_index_stride(size_type stride) { @@ -890,7 +966,7 @@ class chunked_orc_writer_options { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. + * @param meta Associated metadata */ void metadata(table_input_metadata const* meta) { _metadata = meta; } @@ -905,6 +981,9 @@ class chunked_orc_writer_options { } }; +/** + * @brief Builds settings to use for `write_orc_chunked()`. + */ class chunked_orc_writer_options_builder { chunked_orc_writer_options options; @@ -919,15 +998,15 @@ class chunked_orc_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ explicit chunked_orc_writer_options_builder(sink_info const& sink) : options{sink} {} /** * @brief Sets compression type. * - * @param comp The compression type to use. - * @return this for chaining. + * @param comp The compression type to use + * @return this for chaining */ chunked_orc_writer_options_builder& compression(compression_type comp) { @@ -943,8 +1022,8 @@ class chunked_orc_writer_options_builder { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. - * @return this for chaining. + * @param val Frequency of statistics collection + * @return this for chaining */ chunked_orc_writer_options_builder& enable_statistics(statistics_freq val) { @@ -956,7 +1035,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the maximum stripe size, in bytes. * * @param val maximum stripe size - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& stripe_size_bytes(size_t val) { @@ -968,7 +1047,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the maximum number of rows in output stripes. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& stripe_size_rows(size_type val) { @@ -980,7 +1059,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the row index stride. * * @param val new row index stride - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& row_index_stride(size_type val) { @@ -991,8 +1070,8 @@ class chunked_orc_writer_options_builder { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. - * @return this for chaining. + * @param meta Associated metadata + * @return this for chaining */ chunked_orc_writer_options_builder& metadata(table_input_metadata const* meta) { @@ -1004,7 +1083,7 @@ class chunked_orc_writer_options_builder { * @brief Sets Key-Value footer metadata. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& key_value_metadata( std::map metadata) @@ -1022,6 +1101,8 @@ class chunked_orc_writer_options_builder { * @brief move chunked_orc_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `chunked_orc_writer_options` object's r-value reference */ chunked_orc_writer_options&& build() { return std::move(options); } }; @@ -1077,7 +1158,7 @@ class orc_chunked_writer { */ void close(); - // Unique pointer to impl writer class + /// Unique pointer to impl writer class std::unique_ptr writer; }; diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 807fab2e85c..e5b89cc0f91 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,9 +40,9 @@ namespace io { * contains one element per stripe, where each element contains column statistics for each column. */ struct raw_orc_statistics { - std::vector column_names; - std::vector file_stats; - std::vector> stripes_stats; + std::vector column_names; ///< Column names + std::vector file_stats; ///< File-level statistics for each column + std::vector> stripes_stats; ///< Stripe-level statistics for each column }; /** @@ -74,8 +74,8 @@ using no_statistics = std::monostate; */ template struct minmax_statistics { - std::optional minimum; - std::optional maximum; + std::optional minimum; ///< Minimum value + std::optional maximum; ///< Maximum value }; /** @@ -85,7 +85,7 @@ struct minmax_statistics { */ template struct sum_statistics { - std::optional sum; + std::optional sum; ///< Sum of values in column }; /** @@ -116,7 +116,7 @@ struct string_statistics : minmax_statistics, sum_statistics count; + std::vector count; ///< Count of `false` and `true` values }; /** @@ -144,8 +144,8 @@ using binary_statistics = sum_statistics; * the UNIX epoch. The `minimum_utc` and `maximum_utc` are the same values adjusted to UTC. */ struct timestamp_statistics : minmax_statistics { - std::optional minimum_utc; - std::optional maximum_utc; + std::optional minimum_utc; ///< minimum in milliseconds + std::optional maximum_utc; ///< maximum in milliseconds }; namespace orc { @@ -162,7 +162,7 @@ struct column_statistics; * have additional statistics, accessible through `type_specific_stats` accessor. */ struct column_statistics { - std::optional number_of_values; + std::optional number_of_values; ///< number of statistics std::variant - type_specific_stats; + type_specific_stats; ///< type-specific statistics + /** + * @brief Construct a new column statistics object + * + * @param detail_statistics The statistics to initialize the object with + */ column_statistics(cudf::io::orc::column_statistics&& detail_statistics); }; @@ -185,9 +190,9 @@ struct column_statistics { * column. */ struct parsed_orc_statistics { - std::vector column_names; - std::vector file_stats; - std::vector> stripes_stats; + std::vector column_names; ///< column names + std::vector file_stats; ///< file-level statistics + std::vector> stripes_stats; ///< stripe-level statistics }; /** diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index d6812559e38..27821fe5526 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -37,14 +37,11 @@ namespace io { * @file */ -constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; // 128MB -constexpr size_type default_row_group_size_rows = 1000000; -constexpr size_t default_max_page_size_bytes = 512 * 1024; -constexpr size_type default_max_page_size_rows = 20000; +constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; ///< 128MB per row group +constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group +constexpr size_t default_max_page_size_bytes = 512 * 1024; ///< 512KB per page +constexpr size_type default_max_page_size_rows = 20000; ///< 20k rows per page -/** - * @brief Builds parquet_reader_options to use for `read_parquet()`. - */ class parquet_reader_options_builder; /** @@ -73,7 +70,7 @@ class parquet_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read parquet file. + * @param src source information used to read parquet file */ explicit parquet_reader_options(source_info const& src) : _source(src) {} @@ -90,19 +87,23 @@ class parquet_reader_options { /** * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options. * - * @param src Source information to read parquet file. - * @return Builder to build reader options. + * @param src Source information to read parquet file + * @return Builder to build reader options */ static parquet_reader_options_builder builder(source_info const& src); /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns true/false depending on whether strings should be converted to categories or * not. + * + * @return `true` if strings should be converted to categories */ [[nodiscard]] bool is_enabled_convert_strings_to_categories() const { @@ -111,45 +112,57 @@ class parquet_reader_options { /** * @brief Returns true/false depending whether to use pandas metadata or not while reading. + * + * @return `true` if pandas metadata is used while reading */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start */ [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read */ [[nodiscard]] size_type get_num_rows() const { return _num_rows; } /** * @brief Returns names of column to be read. + * + * @return Names of column to be read */ [[nodiscard]] std::vector const& get_columns() const { return _columns; } /** * @brief Returns list of individual row groups to be read. + * + * @return List of individual row groups to be read */ std::vector> const& get_row_groups() const { return _row_groups; } /** * @brief Returns timestamp type used to cast timestamp columns. + * + * @return Timestamp type used to cast timestamp columns */ data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } /** * @brief Sets vector of individual row groups to read. * - * @param row_groups Vector of row groups to read. + * @param row_groups Vector of row groups to read */ void set_row_groups(std::vector> row_groups) { @@ -163,21 +176,21 @@ class parquet_reader_options { /** * @brief Sets to enable/disable conversion of strings to categories. * - * @param val Boolean value to enable/disable conversion of string columns to categories. + * @param val Boolean value to enable/disable conversion of string columns to categories */ void enable_convert_strings_to_categories(bool val) { _convert_strings_to_categories = val; } /** * @brief Sets to enable/disable use of pandas metadata to read. * - * @param val Boolean value whether to use pandas metadata. + * @param val Boolean value whether to use pandas metadata */ void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; } /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. + * @param val Number of rows to skip from start */ void set_skip_rows(size_type val) { @@ -191,7 +204,7 @@ class parquet_reader_options { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. + * @param val Number of rows to read after skip */ void set_num_rows(size_type val) { @@ -205,11 +218,14 @@ class parquet_reader_options { /** * @brief Sets timestamp_type used to cast timestamp columns. * - * @param type The timestamp data_type to which all timestamp columns need to be cast. + * @param type The timestamp data_type to which all timestamp columns need to be cast */ void set_timestamp_type(data_type type) { _timestamp_type = type; } }; +/** + * @brief Builds parquet_reader_options to use for `read_parquet()`. + */ class parquet_reader_options_builder { parquet_reader_options options; @@ -224,15 +240,15 @@ class parquet_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read parquet file. + * @param src The source information used to read parquet file */ explicit parquet_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ parquet_reader_options_builder& columns(std::vector col_names) { @@ -243,8 +259,8 @@ class parquet_reader_options_builder { /** * @brief Sets vector of individual row groups to read. * - * @param row_groups Vector of row groups to read. - * @return this for chaining. + * @param row_groups Vector of row groups to read + * @return this for chaining */ parquet_reader_options_builder& row_groups(std::vector> row_groups) { @@ -255,8 +271,8 @@ class parquet_reader_options_builder { /** * @brief Sets enable/disable conversion of strings to categories. * - * @param val Boolean value to enable/disable conversion of string columns to categories. - * @return this for chaining. + * @param val Boolean value to enable/disable conversion of string columns to categories + * @return this for chaining */ parquet_reader_options_builder& convert_strings_to_categories(bool val) { @@ -267,8 +283,8 @@ class parquet_reader_options_builder { /** * @brief Sets to enable/disable use of pandas metadata to read. * - * @param val Boolean value whether to use pandas metadata. - * @return this for chaining. + * @param val Boolean value whether to use pandas metadata + * @return this for chaining */ parquet_reader_options_builder& use_pandas_metadata(bool val) { @@ -279,8 +295,8 @@ class parquet_reader_options_builder { /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. - * @return this for chaining. + * @param val Number of rows to skip from start + * @return this for chaining */ parquet_reader_options_builder& skip_rows(size_type val) { @@ -291,8 +307,8 @@ class parquet_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. - * @return this for chaining. + * @param val Number of rows to read after skip + * @return this for chaining */ parquet_reader_options_builder& num_rows(size_type val) { @@ -303,8 +319,8 @@ class parquet_reader_options_builder { /** * @brief timestamp_type used to cast timestamp columns. * - * @param type The timestamp data_type to which all timestamp columns need to be cast. - * @return this for chaining. + * @param type The timestamp data_type to which all timestamp columns need to be cast + * @return this for chaining */ parquet_reader_options_builder& timestamp_type(data_type type) { @@ -321,6 +337,8 @@ class parquet_reader_options_builder { * @brief move parquet_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `parquet_reader_options` object's r-value reference */ parquet_reader_options&& build() { return std::move(options); } }; @@ -352,9 +370,6 @@ table_with_metadata read_parquet( * @file */ -/** - * @brief Class to build `parquet_writer_options`. - */ class parquet_writer_options_builder; /** @@ -392,15 +407,15 @@ class parquet_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit parquet_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table) { } - friend class parquet_writer_options_builder; + friend parquet_writer_options_builder; public: /** @@ -413,52 +428,66 @@ class parquet_writer_options { /** * @brief Create builder to create `parquet_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build parquet_writer_options. + * @return Builder to build parquet_writer_options */ static parquet_writer_options_builder builder(sink_info const& sink, table_view const& table); /** * @brief Create builder to create `parquet_writer_options`. * - * @return parquet_writer_options_builder. + * @return parquet_writer_options_builder */ static parquet_writer_options_builder builder(); /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression format used. + * + * @return Compression format */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns level of statistics requested in output file. + * + * @return level of statistics requested in output file */ [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; } /** * @brief Returns table_view. + * + * @return Table view */ [[nodiscard]] table_view get_table() const { return _table; } /** * @brief Returns partitions. + * + * @return Partitions */ [[nodiscard]] std::vector const& get_partitions() const { return _partitions; } /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ std::vector> const& get_key_value_metadata() const { @@ -467,11 +496,15 @@ class parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 + * + * @return `true` if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } /** * @brief Returns Column chunks file paths to be set in the raw output metadata. + * + * @return Column chunks file paths to be set in the raw output metadata */ std::vector const& get_column_chunks_file_paths() const { @@ -480,11 +513,15 @@ class parquet_writer_options { /** * @brief Returns maximum row group size, in bytes. + * + * @return Maximum row group size, in bytes */ auto get_row_group_size_bytes() const { return _row_group_size_bytes; } /** * @brief Returns maximum row group size, in rows. + * + * @return Maximum row group size, in rows */ auto get_row_group_size_rows() const { return _row_group_size_rows; } @@ -522,7 +559,7 @@ class parquet_writer_options { /** * @brief Sets metadata. * - * @param metadata Associated metadata. + * @param metadata Associated metadata */ void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } @@ -541,14 +578,14 @@ class parquet_writer_options { /** * @brief Sets the level of statistics. * - * @param sf Level of statistics requested in the output file. + * @param sf Level of statistics requested in the output file */ void set_stats_level(statistics_freq sf) { _stats_level = sf; } /** * @brief Sets compression type. * - * @param compression The compression type to use. + * @param compression The compression type to use */ void set_compression(compression_type compression) { _compression = compression; } @@ -575,6 +612,8 @@ class parquet_writer_options { /** * @brief Sets the maximum row group size, in bytes. + * + * @param size_bytes Maximum row group size, in bytes to set */ void set_row_group_size_bytes(size_t size_bytes) { @@ -586,6 +625,8 @@ class parquet_writer_options { /** * @brief Sets the maximum row group size, in rows. + * + * @param size_rows Maximum row group size, in rows to set */ void set_row_group_size_rows(size_type size_rows) { @@ -616,6 +657,9 @@ class parquet_writer_options { } }; +/** + * @brief Class to build `parquet_writer_options`. + */ class parquet_writer_options_builder { parquet_writer_options options; @@ -630,8 +674,8 @@ class parquet_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit parquet_writer_options_builder(sink_info const& sink, table_view const& table) : options(sink, table) @@ -643,7 +687,7 @@ class parquet_writer_options_builder { * * @param partitions Partitions of input table in {start_row, num_rows} pairs. If specified, must * be same size as number of sinks in sink_info - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& partitions(std::vector partitions) { @@ -656,8 +700,8 @@ class parquet_writer_options_builder { /** * @brief Sets metadata in parquet_writer_options. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { @@ -669,7 +713,7 @@ class parquet_writer_options_builder { * @brief Sets Key-Value footer metadata in parquet_writer_options. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& key_value_metadata( std::vector> metadata) @@ -683,8 +727,8 @@ class parquet_writer_options_builder { /** * @brief Sets the level of statistics in parquet_writer_options. * - * @param sf Level of statistics requested in the output file. - * @return this for chaining. + * @param sf Level of statistics requested in the output file + * @return this for chaining */ parquet_writer_options_builder& stats_level(statistics_freq sf) { @@ -695,8 +739,8 @@ class parquet_writer_options_builder { /** * @brief Sets compression type in parquet_writer_options. * - * @param compression The compression type to use. - * @return this for chaining. + * @param compression The compression type to use + * @return this for chaining */ parquet_writer_options_builder& compression(compression_type compression) { @@ -709,7 +753,7 @@ class parquet_writer_options_builder { * * @param file_paths Vector of Strings which indicates file path. Must be same size as number of * data sinks - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& column_chunks_file_paths(std::vector file_paths) { @@ -723,7 +767,7 @@ class parquet_writer_options_builder { * @brief Sets the maximum row group size, in bytes. * * @param val maximum row group size - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& row_group_size_bytes(size_t val) { @@ -734,8 +778,8 @@ class parquet_writer_options_builder { /** * @brief Sets the maximum number of rows in output row groups. * - * @param val maximum number of rows - * @return this for chaining. + * @param val maximum number or rows + * @return this for chaining */ parquet_writer_options_builder& row_group_size_rows(size_type val) { @@ -749,7 +793,7 @@ class parquet_writer_options_builder { * bytes, and will be adjusted to match if it is. * * @param val maximum page size - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& max_page_size_bytes(size_t val) { @@ -762,7 +806,7 @@ class parquet_writer_options_builder { * Cannot be larger than the row group size in rows, and will be adjusted to match if it is. * * @param val maximum rows per page - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& max_page_size_rows(size_type val) { @@ -773,8 +817,8 @@ class parquet_writer_options_builder { /** * @brief Sets whether int96 timestamps are written or not in parquet_writer_options. * - * @param enabled Boolean value to enable/disable int96 timestamps. - * @return this for chaining. + * @param enabled Boolean value to enable/disable int96 timestamps + * @return this for chaining */ parquet_writer_options_builder& int96_timestamps(bool enabled) { @@ -791,6 +835,8 @@ class parquet_writer_options_builder { * @brief move parquet_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `parquet_writer_options` object's r-value reference */ parquet_writer_options&& build() { return std::move(options); } }; @@ -805,8 +851,8 @@ class parquet_writer_options_builder { * cudf::io::write_parquet(options); * @endcode * - * @param options Settings for controlling writing behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling writing behavior + * @param mr Device memory resource to use for device memory allocation * * @return A blob that contains the file metadata (parquet FileMetadata thrift message) if * requested in parquet_writer_options (empty blob otherwise). @@ -822,15 +868,12 @@ std::unique_ptr> write_parquet( * * @ingroup io_writers * - * @param[in] metadata_list List of input file metadata. - * @return A parquet-compatible blob that contains the data for all row groups in the list. + * @param[in] metadata_list List of input file metadata + * @return A parquet-compatible blob that contains the data for all row groups in the list */ std::unique_ptr> merge_row_group_metadata( const std::vector>>& metadata_list); -/** - * @brief Builds options for chunked_parquet_writer_options. - */ class chunked_parquet_writer_options_builder; /** @@ -862,7 +905,7 @@ class chunked_parquet_writer_options { /** * @brief Constructor from sink. * - * @param sink Sink used for writer output. + * @param sink Sink used for writer output */ explicit chunked_parquet_writer_options(sink_info const& sink) : _sink(sink) {} @@ -878,26 +921,36 @@ class chunked_parquet_writer_options { /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression format used. + * + * @return Compression format */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns level of statistics requested in output file. + * + * @return Level of statistics requested in output file */ [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; } /** * @brief Returns metadata information. + * + * @return Metadata information */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ std::vector> const& get_key_value_metadata() const { @@ -906,16 +959,22 @@ class chunked_parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 + * + * @return `true` if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } /** * @brief Returns maximum row group size, in bytes. + * + * @return Maximum row group size, in bytes */ auto get_row_group_size_bytes() const { return _row_group_size_bytes; } /** * @brief Returns maximum row group size, in rows. + * + * @return Maximum row group size, in rows */ auto get_row_group_size_rows() const { return _row_group_size_rows; } @@ -940,7 +999,7 @@ class chunked_parquet_writer_options { /** * @brief Sets metadata. * - * @param metadata Associated metadata. + * @param metadata Associated metadata */ void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } @@ -959,14 +1018,14 @@ class chunked_parquet_writer_options { /** * @brief Sets the level of statistics in parquet_writer_options. * - * @param sf Level of statistics requested in the output file. + * @param sf Level of statistics requested in the output file */ void set_stats_level(statistics_freq sf) { _stats_level = sf; } /** * @brief Sets compression type. * - * @param compression The compression type to use. + * @param compression The compression type to use */ void set_compression(compression_type compression) { _compression = compression; } @@ -980,6 +1039,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum row group size, in bytes. + * + * @param size_bytes Maximum row group size, in bytes to set */ void set_row_group_size_bytes(size_t size_bytes) { @@ -991,6 +1052,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum row group size, in rows. + * + * @param size_rows The maximum row group size, in rows to set */ void set_row_group_size_rows(size_type size_rows) { @@ -1023,13 +1086,16 @@ class chunked_parquet_writer_options { /** * @brief creates builder to build chunked_parquet_writer_options. * - * @param sink sink to use for writer output. + * @param sink sink to use for writer output * - * @return Builder to build `chunked_parquet_writer_options`. + * @return Builder to build `chunked_parquet_writer_options` */ static chunked_parquet_writer_options_builder builder(sink_info const& sink); }; +/** + * @brief Builds options for chunked_parquet_writer_options. + */ class chunked_parquet_writer_options_builder { chunked_parquet_writer_options options; @@ -1044,15 +1110,15 @@ class chunked_parquet_writer_options_builder { /** * @brief Constructor from sink. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ chunked_parquet_writer_options_builder(sink_info const& sink) : options(sink){}; /** * @brief Sets metadata to chunked_parquet_writer_options. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ chunked_parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { @@ -1064,7 +1130,7 @@ class chunked_parquet_writer_options_builder { * @brief Sets Key-Value footer metadata in parquet_writer_options. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& key_value_metadata( std::vector> metadata) @@ -1078,8 +1144,8 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets Sets the level of statistics in chunked_parquet_writer_options. * - * @param sf Level of statistics requested in the output file. - * @return this for chaining. + * @param sf Level of statistics requested in the output file + * @return this for chaining */ chunked_parquet_writer_options_builder& stats_level(statistics_freq sf) { @@ -1090,8 +1156,8 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets compression type to chunked_parquet_writer_options. * - * compression The compression type to use. - * @return this for chaining. + * @param compression The compression type to use + * @return this for chaining */ chunked_parquet_writer_options_builder& compression(compression_type compression) { @@ -1105,8 +1171,8 @@ class chunked_parquet_writer_options_builder { * not an internal type for cudf, it needs to be written for backwards * compatibility reasons. * - * @param enabled Boolean value to enable/disable int96 timestamps. - * @return this for chaining. + * @param enabled Boolean value to enable/disable int96 timestamps + * @return this for chaining */ chunked_parquet_writer_options_builder& int96_timestamps(bool enabled) { @@ -1118,7 +1184,7 @@ class chunked_parquet_writer_options_builder { * @brief Sets the maximum row group size, in bytes. * * @param val maximum row group size - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& row_group_size_bytes(size_t val) { @@ -1129,8 +1195,8 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets the maximum number of rows in output row groups. * - * @param val maximum number of rows - * @return this for chaining. + * @param val maximum number or rows + * @return this for chaining */ chunked_parquet_writer_options_builder& row_group_size_rows(size_type val) { @@ -1144,7 +1210,7 @@ class chunked_parquet_writer_options_builder { * bytes, and will be adjusted to match if it is. * * @param val maximum page size - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& max_page_size_bytes(size_t val) { @@ -1157,7 +1223,7 @@ class chunked_parquet_writer_options_builder { * Cannot be larger than the row group size in rows, and will be adjusted to match if it is. * * @param val maximum rows per page - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& max_page_size_rows(size_type val) { @@ -1174,6 +1240,8 @@ class chunked_parquet_writer_options_builder { * @brief move chunked_parquet_writer_options member once it's is built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `chunked_parquet_writer_options` object's r-value reference */ chunked_parquet_writer_options&& build() { return std::move(options); } }; @@ -1239,7 +1307,7 @@ class parquet_chunked_writer { std::unique_ptr> close( std::vector const& column_chunks_file_paths = {}); - // Unique pointer to impl writer class + /// Unique pointer to impl writer class std::unique_ptr writer; }; diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 2e64cd5e96b..b5751af78da 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -381,7 +381,7 @@ std::unique_ptr> left_semi_join( * The column from `right` indicated by `right_on[i]` * will be compared against the column from `left` * indicated by `left_on[i]`. - * @param[in] compare_nulls Controls whether null join-key values should match or not. + * @param[in] compare_nulls Controls whether null join-key values should match or not * @param[in] mr Device memory resource used to allocate the returned table's * device memory * @@ -463,7 +463,7 @@ std::unique_ptr> left_anti_join( * The column from `right` indicated by `right_on[i]` * will be compared against the column from `left` * indicated by `left_on[i]`. - * @param[in] compare_nulls Controls whether null join-key values should match or not. + * @param[in] compare_nulls Controls whether null join-key values should match or not * @param[in] mr Device memory resource used to allocate the returned table's * device memory * @@ -514,8 +514,8 @@ std::unique_ptr cross_join( */ class hash_join { public: - using impl_type = - typename cudf::detail::hash_join>; + using impl_type = typename cudf::detail::hash_join< + cudf::detail::MurmurHash3_32>; ///< Implementation type hash_join() = delete; ~hash_join(); @@ -530,8 +530,8 @@ class hash_join { * @note The `hash_join` object must not outlive the table viewed by `build`, else behavior is * undefined. * - * @param build The build table, from which the hash table is built. - * @param compare_nulls Controls whether null join-key values should match or not. + * @param build The build table, from which the hash table is built + * @param compare_nulls Controls whether null join-key values should match or not * @param stream CUDA stream used for device memory operations and kernel launches */ hash_join(cudf::table_view const& build, @@ -543,8 +543,8 @@ class hash_join { * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @param probe The probe table, from which the tuples are probed. - * @param output_size Optional value which allows users to specify the exact output size. + * @param probe The probe table, from which the tuples are probed + * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. @@ -565,8 +565,8 @@ class hash_join { * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @param probe The probe table, from which the tuples are probed. - * @param output_size Optional value which allows users to specify the exact output size. + * @param probe The probe table, from which the tuples are probed + * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. @@ -587,8 +587,8 @@ class hash_join { * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @param probe The probe table, from which the tuples are probed. - * @param output_size Optional value which allows users to specify the exact output size. + * @param probe The probe table, from which the tuples are probed + * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. @@ -608,7 +608,7 @@ class hash_join { * Returns the exact number of matches (rows) when performing an inner join with the specified * probe table. * - * @param probe The probe table, from which the tuples are probed. + * @param probe The probe table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * * @return The exact number of output when performing an inner join between two tables with @@ -621,7 +621,7 @@ class hash_join { * Returns the exact number of matches (rows) when performing a left join with the specified probe * table. * - * @param probe The probe table, from which the tuples are probed. + * @param probe The probe table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * * @return The exact number of output when performing a left join between two tables with `build` @@ -634,7 +634,7 @@ class hash_join { * Returns the exact number of matches (rows) when performing a full join with the specified probe * table. * - * @param probe The probe table, from which the tuples are probed. + * @param probe The probe table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the intermediate table and columns' device * memory. @@ -679,8 +679,8 @@ class hash_join { * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. - * @param output_size Optional value which allows users to specify the exact output size. + * @param binary_predicate The condition on which to join + * @param output_size Optional value which allows users to specify the exact output size * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -725,8 +725,8 @@ conditional_inner_join( * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. - * @param output_size Optional value which allows users to specify the exact output size. + * @param binary_predicate The condition on which to join + * @param output_size Optional value which allows users to specify the exact output size * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -769,7 +769,7 @@ conditional_left_join(table_view const& left, * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. + * @param binary_predicate The condition on which to join * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -806,8 +806,8 @@ conditional_full_join(table_view const& left, * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. - * @param output_size Optional value which allows users to specify the exact output size. + * @param binary_predicate The condition on which to join + * @param output_size Optional value which allows users to specify the exact output size * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A vector `left_indices` that can be used to construct the result of @@ -845,8 +845,8 @@ std::unique_ptr> conditional_left_semi_join( * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. - * @param output_size Optional value which allows users to specify the exact output size. + * @param binary_predicate The condition on which to join + * @param output_size Optional value which allows users to specify the exact output size * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A vector `left_indices` that can be used to construct the result of @@ -892,12 +892,12 @@ std::unique_ptr> conditional_left_anti_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_inner_join_size API). @@ -952,12 +952,12 @@ mixed_inner_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_left_join_size API). @@ -1012,12 +1012,12 @@ mixed_left_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_full_join_size API). @@ -1065,12 +1065,12 @@ mixed_full_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_full_join_size API). @@ -1117,12 +1117,12 @@ std::unique_ptr> mixed_left_semi_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_full_join_size API). @@ -1158,15 +1158,12 @@ std::unique_ptr> mixed_left_anti_join( * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. - * @param output_size An optional pair of values indicating the exact output size and the number of - * matches for each row in the larger of the two input tables, left or right (may be precomputed - * using the corresponding mixed_inner_join_size API). + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1201,15 +1198,12 @@ std::pair>> mixed_in * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. - * @param output_size An optional pair of values indicating the exact output size and the number of - * matches for each row in the larger of the two input tables, left or right (may be precomputed - * using the corresponding mixed_inner_join_size API). + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1244,15 +1238,12 @@ std::pair>> mixed_le * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. - * @param output_size An optional pair of values indicating the exact output size and the number of - * matches for each row in the larger of the two input tables, left or right (may be precomputed - * using the corresponding mixed_inner_join_size API). + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1285,15 +1276,12 @@ std::pair>> mixed_le * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not * match. * - * @param left_equality The left table used for the equality join. - * @param right_equality The right table used for the equality join. - * @param left_conditional The left table used for the conditional join. - * @param right_conditional The right table used for the conditional join. - * @param binary_predicate The condition on which to join. - * @param compare_nulls Whether or not null values join to each other or not. - * @param output_size An optional pair of values indicating the exact output size and the number of - * matches for each row in the larger of the two input tables, left or right (may be precomputed - * using the corresponding mixed_inner_join_size API). + * @param left_equality The left table used for the equality join + * @param right_equality The right table used for the equality join + * @param left_conditional The left table used for the conditional join + * @param right_conditional The right table used for the conditional join + * @param binary_predicate The condition on which to join + * @param compare_nulls Whether or not null values join to each other or not * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1323,10 +1311,10 @@ std::pair>> mixed_le * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. + * @param binary_predicate The condition on which to join * @param mr Device memory resource used to allocate the returned table and columns' device memory * - * @return The size that would result from performing the requested join. + * @return The size that would result from performing the requested join */ std::size_t conditional_inner_join_size( table_view const& left, @@ -1346,10 +1334,10 @@ std::size_t conditional_inner_join_size( * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. + * @param binary_predicate The condition on which to join * @param mr Device memory resource used to allocate the returned table and columns' device memory * - * @return The size that would result from performing the requested join. + * @return The size that would result from performing the requested join */ std::size_t conditional_left_join_size( table_view const& left, @@ -1369,10 +1357,10 @@ std::size_t conditional_left_join_size( * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. + * @param binary_predicate The condition on which to join * @param mr Device memory resource used to allocate the returned table and columns' device memory * - * @return The size that would result from performing the requested join. + * @return The size that would result from performing the requested join */ std::size_t conditional_left_semi_join_size( table_view const& left, @@ -1392,10 +1380,10 @@ std::size_t conditional_left_semi_join_size( * * @param left The left table * @param right The right table - * @param binary_predicate The condition on which to join. + * @param binary_predicate The condition on which to join * @param mr Device memory resource used to allocate the returned table and columns' device memory * - * @return The size that would result from performing the requested join. + * @return The size that would result from performing the requested join */ std::size_t conditional_left_anti_join_size( table_view const& left, diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 6585932f151..820a763ee43 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,7 +77,7 @@ size_type num_bitmask_words(size_type number_of_bits); * * @param size The number of elements to be represented by the mask * @param state The desired state of the mask - * @param mr Device memory resource used to allocate the returned device_buffer. + * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer A `device_buffer` for use as a null bitmask * satisfying the desired size and state */ @@ -93,10 +93,10 @@ rmm::device_buffer create_null_mask( * Sets `[begin_bit, end_bit)` bits of bitmask to valid if `valid==true` * or null otherwise. * - * @param bitmask Pointer to bitmask (e.g. returned by `column_view.null_mask()`) + * @param bitmask Pointer to bitmask (e.g. returned by `column_viewnull_mask()`) * @param begin_bit Index of the first bit to set (inclusive) * @param end_bit Index of the last bit to set (exclusive) - * @param valid If true set all entries to valid; otherwise, set all to null. + * @param valid If true set all entries to valid; otherwise, set all to null */ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid); diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 3ffd9a87d39..12ff33c7af8 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -58,8 +58,8 @@ namespace cudf { * @param t The table to partition * @param partition_map Non-nullable column of integer values that map each row * in `t` to it's partition. - * @param num_partitions The total number of partitions. - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param num_partitions The total number of partitions + * @param mr Device memory resource used to allocate the returned table's device memory * @return Pair containing the reordered table and vector of `num_partitions + * 1` offsets to each partition such that the size of partition `i` is * determined by `offset[i+1] - offset[i]`. @@ -86,7 +86,7 @@ std::pair, std::vector> partition( * @param hash_function Optional hash id that chooses the hash function to use * @param seed Optional seed value to the hash function * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned table's device memory. + * @param mr Device memory resource used to allocate the returned table's device memory * * @returns An output table and a vector of row offsets to each partition */ diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp index 6aa72de8bc7..6292c4ce932 100644 --- a/cpp/include/cudf/quantiles.hpp +++ b/cpp/include/cudf/quantiles.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,8 @@ namespace cudf { * Computes the specified quantiles by interpolating values between which they * lie, using the interpolation strategy specified in `interp`. * - * @param[in] input Column from which to compute quantile values. - * @param[in] q Specified quantiles in range [0, 1]. + * @param[in] input Column from which to compute quantile values + * @param[in] q Specified quantiles in range [0, 1] * @param[in] interp Strategy used to select between values adjacent to * a specified quantile. * @param[in] ordered_indices Column containing the sorted order of `input`. @@ -48,7 +48,7 @@ namespace cudf { * If false, returns same type as input. * @param[in] mr Device memory resource used to allocate the returned column's device memory - * @returns Column of specified quantiles, with nulls for indeterminable values. + * @returns Column of specified quantiles, with nulls for indeterminable values */ std::unique_ptr quantile( @@ -76,16 +76,16 @@ std::unique_ptr quantile( * quantiles `<= 0` correspond to row `0`. (first) * quantiles `>= 1` correspond to row `input.size() - 1`. (last) * - * @param input Table used to compute quantile rows. - * @param q Desired quantiles in range [0, 1]. + * @param input Table used to compute quantile rows + * @param q Desired quantiles in range [0, 1] * @param interp Strategy used to select between the two rows on either side of the desired quantile. - * @param is_input_sorted Indicates if the input has been pre-sorted. - * @param column_order The desired sort order for each column. - * @param null_precedence The desired order of null compared to other elements. + * @param is_input_sorted Indicates if the input has been pre-sorted + * @param column_order The desired sort order for each column + * @param null_precedence The desired order of null compared to other elements * @param mr Device memory resource used to allocate the returned table's device memory * - * @returns Table of specified quantiles, with nulls for indeterminable values. + * @returns Table of specified quantiles, with nulls for indeterminable values * @throws cudf::logic_error if `interp` is an arithmetic interpolation strategy * @throws cudf::logic_error if `input` is empty */ @@ -110,15 +110,15 @@ std::unique_ptr
quantiles( * corresponding tdigest from `input` row `i`. The length of each output list * is the number of percentages specified in `percentages`. * - * @param input tdigest input data. One tdigest per row. - * @param percentiles Desired percentiles in range [0, 1]. + * @param input tdigest input data. One tdigest per row + * @param percentiles Desired percentiles in range [0, 1] * @param mr Device memory resource used to allocate the returned column's device * memory * * @throws cudf::logic_error if `input` is not a valid tdigest column. * @throws cudf::logic_error if `percentiles` is not a FLOAT64 column. * - * @returns LIST Column containing requested percentile values as FLOAT64. + * @returns LIST Column containing requested percentile values as FLOAT64 */ std::unique_ptr percentile_approx( tdigest::tdigest_column_view const& input, diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index f140ba7d4a9..bf1246aaad8 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -62,9 +62,9 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; * * @param col Input column view * @param agg Aggregation operator applied by the reduction - * @param output_dtype The computation and output precision. + * @param output_dtype The computation and output precision * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Output scalar with reduce result. + * @returns Output scalar with reduce result */ std::unique_ptr reduce( column_view const& col, @@ -102,17 +102,17 @@ std::unique_ptr reduce( * @throw cudf::logic_error if `any` or `all` reduction is called and the * output type is not bool8. * - * @param segmented_values Column view of segmented inputs. + * @param segmented_values Column view of segmented inputs * @param offsets Each segment's offset of @p segmented_values. A list of offsets * with size `num_segments + 1`. The size of `i`th segment is `offsets[i+1] - * offsets[i]`. - * @param agg Aggregation operator applied by the reduction. - * @param output_dtype The output precision. + * @param agg Aggregation operator applied by the reduction + * @param output_dtype The output precision * @param null_handling If `INCLUDE`, the reduction is valid if all elements in * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if * any element in the segment is valid, otherwise null. * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Output column with results of segmented reduction. + * @returns Output column with results of segmented reduction */ std::unique_ptr segmented_reduce( column_view const& segmented_values, @@ -138,7 +138,7 @@ std::unique_ptr segmented_reduce( * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. * Any operation with a null results in a null. * @param[in] mr Device memory resource used to allocate the returned scalar's device memory - * @returns unique pointer to new output column + * @returns Scanned output column */ std::unique_ptr scan( const column_view& input, diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp index 7b535ae8445..08e02db7ca2 100644 --- a/cpp/include/cudf/replace.hpp +++ b/cpp/include/cudf/replace.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ enum class replace_policy : bool { PRECEDING, FOLLOWING }; * * @param[in] input A column whose null values will be replaced * @param[in] replacement A cudf::column whose values will replace null values in input - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] mr Device memory resource used to allocate device memory of the returned column * * @returns A copy of `input` with the null values replaced with corresponding values from * `replacement`. @@ -59,10 +59,10 @@ std::unique_ptr replace_nulls( * `input` and `replacement` must have the same type. * * @param[in] input A column whose null values will be replaced - * @param[in] replacement Scalar used to replace null values in `input`. - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] replacement Scalar used to replace null values in `input` + * @param[in] mr Device memory resource used to allocate device memory of the returned column * - * @returns Copy of `input` with null values replaced by `replacement`. + * @returns Copy of `input` with null values replaced by `replacement` */ std::unique_ptr replace_nulls( column_view const& input, @@ -75,11 +75,11 @@ std::unique_ptr replace_nulls( * If `input[i]` is NULL, then `output[i]` will contain the first non-null value that precedes or * follows the null value, based on `replace_policy`. * - * @param[in] input A column whose null values will be replaced. - * @param[in] replace_policy Specify the position of replacement values relative to null values. - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] input A column whose null values will be replaced + * @param[in] replace_policy Specify the position of replacement values relative to null values + * @param[in] mr Device memory resource used to allocate device memory of the returned column * - * @returns Copy of `input` with null values replaced based on `replace_policy`. + * @returns Copy of `input` with null values replaced based on `replace_policy` */ std::unique_ptr replace_nulls( column_view const& input, @@ -130,7 +130,7 @@ std::unique_ptr replace_nans( * @param input A column whose NaN values will be replaced * @param replacement A cudf::scalar whose value will replace NaN values in input * @param mr Device memory resource used to allocate the returned column's device memory - * @return A copy of `input` with the NaN values replaced by `replacement`. + * @return A copy of `input` with the NaN values replaced by `replacement` */ std::unique_ptr replace_nans( column_view const& input, @@ -141,12 +141,12 @@ std::unique_ptr replace_nans( * @brief Return a copy of `input_col` replacing any `values_to_replace[i]` * found with `replacement_values[i]`. * - * @param input_col The column to find and replace values in. + * @param input_col The column to find and replace values in * @param values_to_replace The values to replace * @param replacement_values The values to replace with - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * - * @returns Copy of `input_col` with specified values replaced. + * @returns Copy of `input_col` with specified values replaced */ std::unique_ptr find_and_replace_all( column_view const& input_col, @@ -189,13 +189,13 @@ std::unique_ptr find_and_replace_all( * @throws cudf::logic_error if `lo.type() != input.type()` * * @param[in] input Column whose elements will be clamped - * @param[in] lo Minimum clamp value. All elements less than `lo` will be replaced by `lo_replace`. + * @param[in] lo Minimum clamp value. All elements less than `lo` will be replaced by `lo_replace` * Ignored if null. - * @param[in] lo_replace All elements less than `lo` will be replaced by `lo_replace`. + * @param[in] lo_replace All elements less than `lo` will be replaced by `lo_replace` * @param[in] hi Maximum clamp value. All elements greater than `hi` will be replaced by * `hi_replace`. Ignored if null. - * @param[in] hi_replace All elements greater than `hi` will be replaced by `hi_replace`. - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] hi_replace All elements greater than `hi` will be replaced by `hi_replace` + * @param[in] mr Device memory resource used to allocate device memory of the returned column * * @return Returns a clamped column as per `lo` and `hi` boundaries */ @@ -237,11 +237,11 @@ std::unique_ptr clamp( * @throws cudf::logic_error if `lo.type() != input.type()` * * @param[in] input Column whose elements will be clamped - * @param[in] lo Minimum clamp value. All elements less than `lo` will be replaced by `lo`. Ignored + * @param[in] lo Minimum clamp value. All elements less than `lo` will be replaced by `lo` Ignored * if null. - * @param[in] hi Maximum clamp value. All elements greater than `hi` will be replaced by `hi`. + * @param[in] hi Maximum clamp value. All elements greater than `hi` will be replaced by `hi` * Ignored if null. - * @param[in] mr Device memory resource used to allocate device memory of the returned column. + * @param[in] mr Device memory resource used to allocate device memory of the returned column * * @return Returns a clamped column as per `lo` and `hi` boundaries */ diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index c78fc97a93f..8eb7f8a3eaf 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ namespace cudf { * @throws cudf::logic_error if input contains no columns. * @throws cudf::logic_error if input columns dtypes are not identical. * - * @param[in] input Table containing columns to interleave. + * @param[in] input Table containing columns to interleave * @param[in] mr Device memory resource used to allocate the returned column's device memory * * @return The interleaved columns as a single column @@ -63,11 +63,11 @@ std::unique_ptr interleave_columns( * return = [[8, 4, 7, 8, 4, 7], [5, 2, 3, 5, 2, 3]] * ``` * - * @param[in] input Table containing rows to be repeated. - * @param[in] count Number of times to tile "rows". Must be non-negative. + * @param[in] input Table containing rows to be repeated + * @param[in] count Number of times to tile "rows". Must be non-negative * @param[in] mr Device memory resource used to allocate the returned table's device memory * - * @return The table containing the tiled "rows". + * @return The table containing the tiled "rows" */ std::unique_ptr
tile( table_view const& input, @@ -88,11 +88,11 @@ enum class flip_endianness : bool { NO, YES }; * return = [[0x00, 0x00, 0x21, 0xe3], [0x00, 0x00, 0x01, 0x35]] * ``` * - * @param input_column Column to be converted to lists of bytes. - * @param endian_configuration Whether to retain or flip the endianness of the elements. - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param input_column Column to be converted to lists of bytes + * @param endian_configuration Whether to retain or flip the endianness of the elements + * @param mr Device memory resource used to allocate the returned column's device memory * - * @return The column containing the lists of bytes. + * @return The column containing the lists of bytes */ std::unique_ptr byte_cast( column_view const& input_column, diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index ba34e20398e..b7e23a3b3d5 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -49,8 +49,8 @@ namespace cudf { * to `FLOAT32` or `FLOAT64` before doing a rolling `MEAN`. * * @param[in] input The input column - * @param[in] preceding_window The static rolling window size in the backward direction. - * @param[in] following_window The static rolling window size in the forward direction. + * @param[in] preceding_window The static rolling window size in the backward direction + * @param[in] following_window The static rolling window size in the forward direction * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] agg The rolling window aggregation type (SUM, MAX, MIN, etc.) @@ -98,6 +98,7 @@ struct window_bounds { * @brief Construct bounded window boundary. * * @param value Finite window boundary (in days or rows) + * @returns A window boundary */ static window_bounds get(size_type value) { return window_bounds(false, value); } @@ -116,8 +117,8 @@ struct window_bounds { // For the present, assume units from context: // 1. For time-based window functions, assume DAYS as before // 2. For all else, assume ROWS as before. - const bool is_unbounded; - const size_type value; + const bool is_unbounded; ///< Whether the window boundary is unbounded + const size_type value; ///< Finite window boundary value (in days or rows) private: explicit window_bounds(bool is_unbounded_, size_type value_ = 0) @@ -188,8 +189,8 @@ struct window_bounds { * * @param[in] group_keys The (pre-sorted) grouping columns * @param[in] input The input column (to be aggregated) - * @param[in] preceding_window The static rolling window size in the backward direction. - * @param[in] following_window The static rolling window size in the forward direction. + * @param[in] preceding_window The static rolling window size in the backward direction + * @param[in] following_window The static rolling window size in the forward direction * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) @@ -348,8 +349,8 @@ std::unique_ptr grouped_rolling_window( * @param[in] timestamp_column The (pre-sorted) timestamps for each row * @param[in] timestamp_order The order (ASCENDING/DESCENDING) in which the timestamps are sorted * @param[in] input The input column (to be aggregated) - * @param[in] preceding_window_in_days The rolling window time-interval in the backward direction. - * @param[in] following_window_in_days The rolling window time-interval in the forward direction. + * @param[in] preceding_window_in_days The rolling window time-interval in the backward direction + * @param[in] following_window_in_days The rolling window time-interval in the forward direction * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) @@ -383,23 +384,8 @@ std::unique_ptr grouped_time_range_rolling_window( * rolling_aggregation const& aggr, * rmm::mr::device_memory_resource* mr) * - * The `preceding_window_in_days` and `following_window_in_days` supports "unbounded" windows, - * if set to `window_bounds::unbounded()`. - * - * @param[in] group_keys The (pre-sorted) grouping columns - * @param[in] timestamp_column The (pre-sorted) timestamps for each row - * @param[in] timestamp_order The order (ASCENDING/DESCENDING) in which the timestamps are sorted - * @param[in] input The input column (to be aggregated) - * @param[in] preceding_window_in_days Possibly unbounded time-interval in the backward direction, - * specified as a `window_bounds` - * @param[in] following_window_in_days Possibly unbounded time-interval in the forward direction, - * specified as a `window_bounds` - * @param[in] min_periods Minimum number of observations in window required to have a value, - * otherwise element `i` is null. - * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) - * @param[in] mr Device memory resource used to allocate the returned column's device memory - * - * @returns A nullable output column containing the rolling window results + * The `preceding_window_in_days` and `following_window_in_days` are specified as a `window_bounds` + * and supports "unbounded" windows, if set to `window_bounds::unbounded()`. */ std::unique_ptr grouped_time_range_rolling_window( table_view const& group_keys, @@ -513,7 +499,7 @@ std::unique_ptr grouped_time_range_rolling_window( * @param[in] order The order (ASCENDING/DESCENDING) in which the order-by column is sorted * @param[in] input The input column (to be aggregated) * @param[in] preceding The interval value in the backward direction - * @param[in] following The interval value in the forward direction. + * @param[in] following The interval value in the forward direction * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index 0a48e1d1fc8..c0a300a7d82 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp index 3b68923ee93..c83d81b6ebb 100644 --- a/cpp/include/cudf/search.hpp +++ b/cpp/include/cudf/search.hpp @@ -57,12 +57,12 @@ namespace cudf { * result = { 3 } * @endcode * - * @param haystack The table containing search space. - * @param needles Values for which to find the insert locations in the search space. - * @param column_order Vector of column sort order. - * @param null_precedence Vector of null_precedence enums needles. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return A non-nullable column of cudf::size_type elements containing the insertion points. + * @param haystack The table containing search space + * @param needles Values for which to find the insert locations in the search space + * @param column_order Vector of column sort order + * @param null_precedence Vector of null_precedence enums needles + * @param mr Device memory resource used to allocate the returned column's device memory + * @return A non-nullable column of cudf::size_type elements containing the insertion points */ std::unique_ptr lower_bound( table_view const& haystack, @@ -97,12 +97,12 @@ std::unique_ptr lower_bound( * result = { 5 } * @endcode * - * @param haystack The table containing search space. - * @param needles Values for which to find the insert locations in the search space. - * @param column_order Vector of column sort order. - * @param null_precedence Vector of null_precedence enums needles. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return A non-nullable column of cudf::size_type elements containing the insertion points. + * @param haystack The table containing search space + * @param needles Values for which to find the insert locations in the search space + * @param column_order Vector of column sort order + * @param null_precedence Vector of null_precedence enums needles + * @param mr Device memory resource used to allocate the returned column's device memory + * @return A non-nullable column of cudf::size_type elements containing the insertion points */ std::unique_ptr upper_bound( table_view const& haystack, @@ -124,9 +124,9 @@ std::unique_ptr upper_bound( * result = true * @endcode * - * @param haystack The column containing search space. - * @param needle A scalar value to check for existence in the search space. - * @return true if the given `needle` value exists in the `haystack` column. + * @param haystack The column containing search space + * @param needle A scalar value to check for existence in the search space + * @return true if the given `needle` value exists in the `haystack` column */ bool contains(column_view const& haystack, scalar const& needle); @@ -145,10 +145,10 @@ bool contains(column_view const& haystack, scalar const& needle); * result = { true, true, false, false } * @endcode * - * @param haystack The column containing search space. - * @param needles A column of values to check for existence in the search space. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return A BOOL column indicating if each element in `needles` exists in the search space. + * @param haystack The column containing search space + * @param needles A column of values to check for existence in the search space + * @param mr Device memory resource used to allocate the returned column's device memory + * @return A BOOL column indicating if each element in `needles` exists in the search space */ std::unique_ptr contains( column_view const& haystack, diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index b7e915650dc..0198ae641e9 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -39,7 +39,7 @@ namespace cudf { * equal to `input.num_columns()` or empty. If empty, all columns will be sorted * in ascending order. * @param null_precedence The desired order of null compared to other elements - * for each column. Size must be equal to `input.num_columns()` or empty. + * for each column. Size must be equal to `input.num_columns()` or empty. * If empty, all columns will be sorted in `null_order::BEFORE`. * @param mr Device memory resource used to allocate the returned column's device memory * @return A non-nullable column of `size_type` elements containing the permuted row indices of @@ -79,7 +79,7 @@ std::unique_ptr stable_sorted_order( * `input.num_columns()` or empty. If empty, * `null_order::BEFORE` is assumed for all columns. * - * @returns bool true if sorted as expected, false if not. + * @returns bool true if sorted as expected, false if not */ bool is_sorted(cudf::table_view const& table, std::vector const& column_order, @@ -180,7 +180,7 @@ std::unique_ptr
stable_sort_by_key( * @endcode * * @param input The column to rank - * @param method The ranking method used for tie breaking (same values). + * @param method The ranking method used for tie breaking (same values) * @param column_order The desired sort order for ranking * @param null_handling flag to include nulls during ranking. If nulls are not * included, corresponding rank will be null. @@ -219,7 +219,7 @@ std::unique_ptr rank( * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. * @param mr Device memory resource to allocate any returned objects - * @return sorted order of the segment sorted table . + * @return sorted order of the segment sorted table * */ std::unique_ptr segmented_sorted_order( @@ -260,7 +260,7 @@ std::unique_ptr stable_segmented_sorted_order( * `keys.num_columns()` or empty. If empty, all columns will be sorted with * `null_order::BEFORE`. * @param mr Device memory resource to allocate any returned objects - * @return table with elements in each segment sorted. + * @return table with elements in each segment sorted * */ std::unique_ptr
segmented_sort_by_key( diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index b48795de16e..586392650e3 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -58,7 +58,7 @@ namespace cudf { * @note if @p input.num_rows() is zero, or @p keys is empty or has no nulls, * there is no error, and an empty `table` is returned * - * @param[in] input The input `table_view` to filter. + * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-null fields in a row * required to keep the row. @@ -92,7 +92,7 @@ std::unique_ptr
drop_nulls( * Same as drop_nulls but defaults keep_threshold to the number of columns in * @p keys. * - * @param[in] input The input `table_view` to filter. + * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without nulls in the columns @@ -132,7 +132,7 @@ std::unique_ptr
drop_nulls( * * @throws cudf::logic_error if The `keys` columns are not floating-point type. * - * @param[in] input The input `table_view` to filter. + * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-NAN elements in a row * required to keep the row. @@ -167,7 +167,7 @@ std::unique_ptr
drop_nans( * Same as drop_nans but defaults keep_threshold to the number of columns in * @p keys. * - * @param[in] input The input `table_view` to filter. + * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without NANs in the columns @@ -242,7 +242,7 @@ enum class duplicate_keep_option { * @param[in] mr Device memory resource used to allocate the returned table's device * memory * - * @return Table with unique rows from each sequence of equivalent rows as specified by `keep`. + * @return Table with unique rows from each sequence of equivalent rows as specified by `keep` */ std::unique_ptr
unique( table_view const& input, @@ -273,7 +273,7 @@ std::unique_ptr
unique( * @param[in] mr Device memory resource used to allocate the returned table's device * memory * - * @return Table with distinct rows in an unspecified order. + * @return Table with distinct rows in an unspecified order */ std::unique_ptr
distinct( table_view const& input, @@ -304,7 +304,7 @@ cudf::size_type unique_count(column_view const& input, * @brief Count the number of consecutive groups of equivalent rows in a table. * * @param[in] input Table whose consecutive groups of equivalent rows will be counted - * @param[in] nulls_equal flag to denote if null elements should be considered equal. + * @param[in] nulls_equal flag to denote if null elements should be considered equal * nulls are not equal if null_equality::UNEQUAL. * * @return number of consecutive groups of equivalent rows in the column diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index 45e8ff1310c..e221d7e2210 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ std::unique_ptr transform( * @throws cudf::logic_error if `input.type()` is a non-floating type * * @param input An immutable view of the input column of floating-point type - * @param mr Device memory resource used to allocate the returned bitmask. + * @param mr Device memory resource used to allocate the returned bitmask * @return A pair containing a `device_buffer` with the new bitmask and it's * null count obtained by replacing `NaN` in `input` with null. */ @@ -77,10 +77,10 @@ std::pair, size_type> nans_to_nulls( * * @throws cudf::logic_error if passed an expression operating on table_reference::RIGHT. * - * @param table The table used for expression evaluation. - * @param expr The root of the expression tree. - * @param mr Device memory resource. - * @return std::unique_ptr Output column. + * @param table The table used for expression evaluation + * @param expr The root of the expression tree + * @param mr Device memory resource + * @return std::unique_ptr Output column */ std::unique_ptr compute_column( table_view const& table, @@ -96,8 +96,8 @@ std::unique_ptr compute_column( * * @throws cudf::logic_error if `input.type()` is a non-boolean type * - * @param input Boolean elements to convert to a bitmask. - * @param mr Device memory resource used to allocate the returned bitmask. + * @param input Boolean elements to convert to a bitmask + * @param mr Device memory resource used to allocate the returned bitmask * @return A pair containing a `device_buffer` with the new bitmask and it's * null count obtained from input considering `true` represent `valid`/`1` and * `false` represent `invalid`/`0`. @@ -185,7 +185,7 @@ std::pair, table_view> one_hot_encode( * @param begin_bit position of the bit from which the conversion should start * @param end_bit position of the bit before which the conversion should stop * @param mr Device memory resource used to allocate the returned columns' device memory - * @return A boolean column representing the given mask from [begin_bit, end_bit). + * @return A boolean column representing the given mask from [begin_bit, end_bit) */ std::unique_ptr mask_to_bools( bitmask_type const* bitmask, @@ -214,9 +214,9 @@ std::unique_ptr mask_to_bools( * * row_bit_count(column(x)) >= row_bit_count(gather(column(x))) * - * @param t The table view to perform the computation on. + * @param t The table view to perform the computation on * @param mr Device memory resource used to allocate the returned columns' device memory - * @return A 32-bit integer column containing the per-row bit counts. + * @return A 32-bit integer column containing the per-row bit counts */ std::unique_ptr row_bit_count( table_view const& t, diff --git a/cpp/include/cudf/transpose.hpp b/cpp/include/cudf/transpose.hpp index c0b2d50c8f7..dc67e73c5a7 100644 --- a/cpp/include/cudf/transpose.hpp +++ b/cpp/include/cudf/transpose.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ namespace cudf { * @throw cudf::logic_error if column types are non-homogenous * @throw cudf::logic_error if column types are non-fixed-width * - * @param[in] input A table (M cols x N rows) to be transposed. + * @param[in] input A table (M cols x N rows) to be transposed * @param[in] mr Device memory resource used to allocate the device memory of returned value * @return The transposed input (N cols x M rows) as a `column` and * `table_view`, representing the owner and transposed table, diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 36f08b7f23e..27f21f3d066 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,9 @@ namespace cudf { * @brief Column APIs for unary ops */ +/** + * @brief Types of unary operations that can be performed on data. + */ enum class unary_operator : int32_t { SIN, ///< Trigonometric sine COS, ///< Trigonometric cosine @@ -121,7 +124,7 @@ std::unique_ptr cast( * @throws cudf::logic_error if `input` is a non-floating point type * * @param input A column of floating-point elements - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * * @returns A non-nullable column of `type_id::BOOL8` elements with `true` representing `NAN` values */ @@ -137,7 +140,7 @@ std::unique_ptr is_nan( * @throws cudf::logic_error if `input` is a non-floating point type * * @param input A column of floating-point elements - * @param mr Device memory resource used to allocate the returned column's device memory. + * @param mr Device memory resource used to allocate the returned column's device memory * * @returns A non-nullable column of `type_id::BOOL8` elements with `false` representing `NAN` * values