Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing documentation in aggregation.hpp #10887

Merged
merged 5 commits into from
May 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 103 additions & 23 deletions cpp/include/cudf/aggregation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,56 @@ class aggregation {
};

aggregation() = delete;

/**
* @brief Construct a new aggregation object
*
* @param a aggregation::Kind enum value
*/
aggregation(aggregation::Kind a) : kind{a} {}
Kind kind; ///< The aggregation to perform
virtual ~aggregation() = default;

/**
* @brief Compares two aggregation objects for equality
*
* @param other The other aggregation to compare with
* @return True if the two aggregations are equal
*/
[[nodiscard]] virtual bool is_equal(aggregation const& other) const { return kind == other.kind; }

/**
* @brief Computes the hash value of the aggregation
*
* @return The hash value of the aggregation
*/
[[nodiscard]] virtual size_t do_hash() const { return std::hash<int>{}(kind); }

/**
* @pure @brief Clones the aggregation object
*
* @return A copy of the aggregation object
*/
[[nodiscard]] virtual std::unique_ptr<aggregation> clone() const = 0;

// override functions for compound aggregations
/**
* @pure @brief Get the simple aggregations that this aggregation requires to compute.
*
* @param col_type The type of the column to aggregate
* @param collector The collector visitor pattern to use to collect the simple aggregations
* @return Vector of pre-requisite simple aggregations
*/
virtual std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
data_type col_type, cudf::detail::simple_aggregations_collector& collector) const = 0;
virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0;

/**
* @pure @brief Compute the aggregation after pre-requisite simple aggregations have been
* computed.
*
* @param finalizer The finalizer visitor pattern to use to compute the aggregation
*/
virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0;
};

/**
Expand All @@ -147,7 +185,8 @@ class rolling_aggregation : public virtual aggregation {

protected:
rolling_aggregation() {}
rolling_aggregation(aggregation::Kind a) : aggregation{a} {}
/// constructor inherited from cudf::aggregation
using aggregation::aggregation;
Comment on lines +188 to +189
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using declaration style here instead of old code.
This is the only code change in this PR.

};

/**
Expand Down Expand Up @@ -205,46 +244,57 @@ class segmented_reduce_aggregation : public virtual aggregation {
segmented_reduce_aggregation() {}
};

/// Type of code in the user defined function string.
enum class udf_type : bool { CUDA, PTX };
/// Type of correlation method.
enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };

/// Factory to create a SUM aggregation
/// @return A SUM aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_sum_aggregation();

/// Factory to create a PRODUCT aggregation
/// @return A PRODUCT aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_product_aggregation();

/// Factory to create a MIN aggregation
/// @return A MIN aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_min_aggregation();

/// Factory to create a MAX aggregation
/// @return A MAX aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_max_aggregation();

/**
* @brief Factory to create a COUNT aggregation
*
* @param null_handling Indicates if null values will be counted.
* @param null_handling Indicates if null values will be counted
* @return A COUNT aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_count_aggregation(null_policy null_handling = null_policy::EXCLUDE);

/// Factory to create an ANY aggregation
/// @return A ANY aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_any_aggregation();

/// Factory to create a ALL aggregation
/// @return A ALL aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_all_aggregation();

/// Factory to create a SUM_OF_SQUARES aggregation
/// @return A SUM_OF_SQUARES aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_sum_of_squares_aggregation();

/// Factory to create a MEAN aggregation
/// @return A MEAN aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_mean_aggregation();

Expand All @@ -258,6 +308,7 @@ std::unique_ptr<Base> make_mean_aggregation();
* deviation across multiple discrete sets. See
* `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` for more
* detail.
* @return A M2 aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_m2_aggregation();
Expand All @@ -269,6 +320,7 @@ std::unique_ptr<Base> make_m2_aggregation();
* `variance` is `N - ddof`, where `N` is the population size.
*
* @throw cudf::logic_error if input type is chrono or compound types.
* @return A VARIANCE aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_variance_aggregation(size_type ddof = 1);
Expand All @@ -280,11 +332,13 @@ std::unique_ptr<Base> make_variance_aggregation(size_type ddof = 1);
* `std` is `N - ddof`, where `N` is the population size.
*
* @throw cudf::logic_error if input type is chrono or compound types.
* @return A STD aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_std_aggregation(size_type ddof = 1);

/// Factory to create a MEDIAN aggregation
/// @return A MEDIAN aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_median_aggregation();

Expand All @@ -293,54 +347,60 @@ std::unique_ptr<Base> make_median_aggregation();
*
* @param quantiles The desired quantiles
* @param interp The desired interpolation
* @return A QUANTILE aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_quantile_aggregation(std::vector<double> const& quantiles,
interpolation interp = interpolation::LINEAR);

/**
* @brief Factory to create an `argmax` aggregation
* @brief Factory to create an ARGMAX aggregation
*
* `argmax` returns the index of the maximum element.
* ARGMAX returns the index of the maximum element.
* @return A ARGMAX aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_argmax_aggregation();

/**
* @brief Factory to create an `argmin` aggregation
* @brief Factory to create an ARGMIN aggregation
*
* `argmin` returns the index of the minimum element.
* @return A ARGMIN aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_argmin_aggregation();

/**
* @brief Factory to create a `nunique` aggregation
* @brief Factory to create a NUNIQUE aggregation
*
* `nunique` returns the number of unique elements.
* @param null_handling Indicates if null values will be counted.
* NUNIQUE returns the number of unique elements.
* @param null_handling Indicates if null values will be counted
* @return A NUNIQUE aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_nunique_aggregation(null_policy null_handling = null_policy::EXCLUDE);

/**
* @brief Factory to create a `nth_element` aggregation
* @brief Factory to create a NTH_ELEMENT aggregation
*
* `nth_element` returns the n'th element of the group/series.
* NTH_ELEMENT returns the n'th element of the group/series.
*
* If @p n is not within the range `[-group_size, group_size)`, the result of
* the respective group will be null. Negative indices `[-group_size, -1]`
* corresponds to `[0, group_size-1]` indices respectively where `group_size` is
* the size of each group.
*
* @param n index of nth element in each group.
* @param null_handling Indicates to include/exclude nulls during indexing.
* @param n index of nth element in each group
* @param null_handling Indicates to include/exclude nulls during indexing
* @return A NTH_ELEMENT aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_nth_element_aggregation(
size_type n, null_policy null_handling = null_policy::INCLUDE);

/// Factory to create a ROW_NUMBER aggregation
/// @return A ROW_NUMBER aggregation object
template <typename Base = aggregation>
std::unique_ptr<Base> make_row_number_aggregation();

Expand Down Expand Up @@ -408,12 +468,13 @@ std::unique_ptr<Base> make_row_number_aggregation();
*
* @endcode
*
* @param method The ranking method used for tie breaking (same values).
* @param method The ranking method used for tie breaking (same values)
* @param column_order The desired sort order for ranking
* @param null_handling flag to include nulls during ranking. If nulls are not included,
* @param null_handling flag to include nulls during ranking If nulls are not included,
* the corresponding rank will be null.
* @param null_precedence The desired order of null compared to other elements for column
* @param percentage enum to denote the type of conversion of ranks to percentage in range (0,1]
* @return A RANK aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_rank_aggregation(rank_method method,
Expand All @@ -430,7 +491,8 @@ std::unique_ptr<Base> make_rank_aggregation(rank_method method,
* If `null_handling` is set to `EXCLUDE`, null elements are dropped from each
* of the list rows.
*
* @param null_handling Indicates whether to include/exclude nulls in list elements.
* @param null_handling Indicates whether to include/exclude nulls in list elements
* @return A COLLECT_LIST aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_collect_list_aggregation(
Expand All @@ -450,17 +512,28 @@ std::unique_ptr<Base> make_collect_list_aggregation(
* equal.
* @param nans_equal Flag to specify whether NaN values in floating point column should be
* considered equal.
* @return A COLLECT_SET aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::UNEQUAL);

/// Factory to create a LAG aggregation
/**
* @brief Factory to create a LAG aggregation
*
* @param offset The number of rows to lag the input
* @return A LAG aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_lag_aggregation(size_type offset);

/// Factory to create a LEAD aggregation
/**
* @brief Factory to create a LEAD aggregation
*
* @param offset The number of rows to lead the input
* @return A LEAD aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_lead_aggregation(size_type offset);

Expand All @@ -471,7 +544,7 @@ std::unique_ptr<Base> make_lead_aggregation(size_type offset);
* @param[in] user_defined_aggregator A string containing the aggregator code
* @param[in] output_type expected output type
*
* @return aggregation unique pointer housing user_defined_aggregator string.
* @return An aggregation containing a user-defined aggregator string
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_udf_aggregation(udf_type type,
Expand All @@ -486,6 +559,8 @@ std::unique_ptr<Base> make_udf_aggregation(udf_type type,
* groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the
* input lists column to be non-nullable (the child column containing list entries is not subjected
* to this requirement).
*
* @return A MERGE_LISTS aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_lists_aggregation();
Expand All @@ -510,6 +585,7 @@ std::unique_ptr<Base> make_merge_lists_aggregation();
* during dropping duplicate list entries.
* @param nans_equal Flag to specify whether NaN values in floating point column should be
* considered equal during dropping duplicate list entries.
* @return A MERGE_SETS aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL,
Expand All @@ -526,6 +602,8 @@ std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = nu
*
* The input `M2` aggregation values are expected to be all non-negative numbers, since they
* were output from `M2` aggregation.
*
* @return A MERGE_M2 aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_m2_aggregation();
Expand All @@ -535,9 +613,10 @@ std::unique_ptr<Base> make_merge_m2_aggregation();
*
* Compute covariance between two columns.
* The input columns are child columns of a non-nullable struct columns.
* @param min_periods Minimum number of non-null observations required to produce a result.
* @param min_periods Minimum number of non-null observations required to produce a result
* @param ddof Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N is
* the number of non-null observations.
* @return A COVARIANCE aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_covariance_aggregation(size_type min_periods = 1, size_type ddof = 1);
Expand All @@ -549,7 +628,8 @@ std::unique_ptr<Base> make_covariance_aggregation(size_type min_periods = 1, siz
* The input columns are child columns of a non-nullable struct columns.
*
* @param type correlation_type
* @param min_periods Minimum number of non-null observations required to produce a result.
* @param min_periods Minimum number of non-null observations required to produce a result
* @return A CORRELATION aggregation object
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_correlation_aggregation(correlation_type type,
Expand Down Expand Up @@ -587,7 +667,7 @@ std::unique_ptr<Base> make_correlation_aggregation(correlation_type type,
* the computed tdigests: A value of 1000 will result in a tdigest containing no
* more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information.
*
* @returns A TDIGEST aggregation object.
* @return A TDIGEST aggregation object
*/
template <typename Base>
std::unique_ptr<Base> make_tdigest_aggregation(int max_centroids = 1000);
Expand Down Expand Up @@ -625,7 +705,7 @@ std::unique_ptr<Base> make_tdigest_aggregation(int max_centroids = 1000);
* the computed tdigests: A value of 1000 will result in a tdigest containing no
* more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information.
*
* @returns A MERGE_TDIGEST aggregation object.
* @return A MERGE_TDIGEST aggregation object
*/
template <typename Base>
std::unique_ptr<Base> make_merge_tdigest_aggregation(int max_centroids = 1000);
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/concatenate.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,8 +53,8 @@ rmm::device_buffer concatenate_masks(
*
* @param columns_to_concat host_span of column views to be concatenated into a single column
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Unique pointer to a single table having all the rows from the
* elements of `columns_to_concat` respectively in the same order.
* @return A single column having all the rows from the elements of `columns_to_concat` respectively
* in the same order.
*/
std::unique_ptr<column> concatenate(
host_span<column_view const> columns_to_concat,
Expand Down Expand Up @@ -83,8 +83,8 @@ std::unique_ptr<column> concatenate(
*
* @param tables_to_concat host_span of table views to be concatenated into a single table
* @param mr Device memory resource used to allocate the returned table's device memory.
* @return Unique pointer to a single table having all the rows from the
* elements of `tables_to_concat` respectively in the same order.
* @return A single table having all the rows from the elements of `tables_to_concat` respectively
* in the same order.
*/
std::unique_ptr<table> concatenate(
host_span<table_view const> tables_to_concat,
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ std::unique_ptr<column> segmented_reduce(
* null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
* Any operation with a null results in a null.
* @param[in] mr Device memory resource used to allocate the returned scalar's device memory
* @returns unique pointer to new output column
* @returns Scanned output column
*/
std::unique_ptr<column> scan(
const column_view& input,
Expand Down