Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing documentation in aggregation.hpp #10887

Merged
merged 5 commits into from
May 28, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 90 additions & 10 deletions cpp/include/cudf/aggregation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,56 @@ class aggregation {
};

aggregation() = delete;

/**
* @brief Construct a new aggregation object
*
* @param a aggregation::Kind enum value
*/
aggregation(aggregation::Kind a) : kind{a} {}
Kind kind; ///< The aggregation to perform
virtual ~aggregation() = default;

/**
* @brief Compares two aggregation objects for equality
*
* @param other The other aggregation to compare with
* @return True if the two aggregations are equal
*/
[[nodiscard]] virtual bool is_equal(aggregation const& other) const { return kind == other.kind; }

/**
* @brief Computes the hash value of the aggregation
*
* @return The hash value of the aggregation
*/
[[nodiscard]] virtual size_t do_hash() const { return std::hash<int>{}(kind); }

/**
* @pure @brief Clones the aggregation object
*
* @return A copy of the aggregation object
*/
[[nodiscard]] virtual std::unique_ptr<aggregation> clone() const = 0;

// override functions for compound aggregations
/**
* @pure @brief Get the simple aggregations that this aggregation requires to compute.
*
* @param col_type The type of the column to aggregate
* @param collector The collector visitor pattern to use to collect the simple aggregations
* @return Vector of pre-requisite simple aggregations
*/
virtual std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
data_type col_type, cudf::detail::simple_aggregations_collector& collector) const = 0;
virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0;

/**
* @pure @brief Compute the aggregation after pre-requisite simple aggregations have been
* computed.
*
* @param finalizer The finalizer visitor pattern to use to compute the aggregation
*/
virtual void finalize(cudf::detail::aggregation_finalizer& finalizer) const = 0;
};

/**
Expand All @@ -147,7 +185,8 @@ class rolling_aggregation : public virtual aggregation {

protected:
rolling_aggregation() {}
rolling_aggregation(aggregation::Kind a) : aggregation{a} {}
/// constructor inherited from cudf::aggregation
using aggregation::aggregation;
Comment on lines +188 to +189
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using declaration style here instead of old code.
This is the only code change in this PR.

};

/**
Expand Down Expand Up @@ -205,46 +244,57 @@ class segmented_reduce_aggregation : public virtual aggregation {
segmented_reduce_aggregation() {}
};

/// Type of code in the user defined function string.
enum class udf_type : bool { CUDA, PTX };
/// Type of correlation method.
enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };

/// Factory to create a SUM aggregation
/// @return A pointer to a new SUM aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_sum_aggregation();

/// Factory to create a PRODUCT aggregation
/// @return A pointer to a new PRODUCT aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_product_aggregation();

/// Factory to create a MIN aggregation
/// @return A pointer to a new MIN aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_min_aggregation();

/// Factory to create a MAX aggregation
/// @return A pointer to a new MAX aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_max_aggregation();

/**
* @brief Factory to create a COUNT aggregation
*
* @param null_handling Indicates if null values will be counted.
* @return A pointer to a new COUNT aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_count_aggregation(null_policy null_handling = null_policy::EXCLUDE);

/// Factory to create an ANY aggregation
/// @return A pointer to a new ANY aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_any_aggregation();

/// Factory to create a ALL aggregation
/// @return A pointer to a new ALL aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_all_aggregation();

/// Factory to create a SUM_OF_SQUARES aggregation
/// @return A pointer to a new SUM_OF_SQUARES aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_sum_of_squares_aggregation();

/// Factory to create a MEAN aggregation
/// @return A pointer to a new MEAN aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_mean_aggregation();

Expand All @@ -258,6 +308,7 @@ std::unique_ptr<Base> make_mean_aggregation();
* deviation across multiple discrete sets. See
* `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` for more
* detail.
* @return A pointer to a new M2 aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_m2_aggregation();
Expand All @@ -269,6 +320,7 @@ std::unique_ptr<Base> make_m2_aggregation();
* `variance` is `N - ddof`, where `N` is the population size.
*
* @throw cudf::logic_error if input type is chrono or compound types.
* @return A pointer to a new VARIANCE aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_variance_aggregation(size_type ddof = 1);
Expand All @@ -280,11 +332,13 @@ std::unique_ptr<Base> make_variance_aggregation(size_type ddof = 1);
* `std` is `N - ddof`, where `N` is the population size.
*
* @throw cudf::logic_error if input type is chrono or compound types.
* @return A pointer to a new STD aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_std_aggregation(size_type ddof = 1);

/// Factory to create a MEDIAN aggregation
/// @return A pointer to a new MEDIAN aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_median_aggregation();

Expand All @@ -293,23 +347,26 @@ std::unique_ptr<Base> make_median_aggregation();
*
* @param quantiles The desired quantiles
* @param interp The desired interpolation
* @return A pointer to a new QUANTILE aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_quantile_aggregation(std::vector<double> const& quantiles,
interpolation interp = interpolation::LINEAR);

/**
* @brief Factory to create an `argmax` aggregation
* @brief Factory to create an ARGMAX aggregation
*
* `argmax` returns the index of the maximum element.
* ARGMAX returns the index of the maximum element.
* @return A pointer to a new ARGMAX aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_argmax_aggregation();

/**
* @brief Factory to create an `argmin` aggregation
* @brief Factory to create an ARGMIN aggregation
*
* `argmin` returns the index of the minimum element.
* @return A pointer to a new ARGMIN aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_argmin_aggregation();
Expand All @@ -319,6 +376,7 @@ std::unique_ptr<Base> make_argmin_aggregation();
*
* `nunique` returns the number of unique elements.
* @param null_handling Indicates if null values will be counted.
* @return A pointer to a new `nunique` aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_nunique_aggregation(null_policy null_handling = null_policy::EXCLUDE);
Expand All @@ -335,12 +393,14 @@ std::unique_ptr<Base> make_nunique_aggregation(null_policy null_handling = null_
*
* @param n index of nth element in each group.
* @param null_handling Indicates to include/exclude nulls during indexing.
* @return A pointer to a new `nth_element` aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_nth_element_aggregation(
size_type n, null_policy null_handling = null_policy::INCLUDE);

/// Factory to create a ROW_NUMBER aggregation
/// @return A pointer to a new ROW_NUMBER aggregation
template <typename Base = aggregation>
std::unique_ptr<Base> make_row_number_aggregation();

Expand Down Expand Up @@ -414,6 +474,7 @@ std::unique_ptr<Base> make_row_number_aggregation();
* the corresponding rank will be null.
* @param null_precedence The desired order of null compared to other elements for column
* @param percentage enum to denote the type of conversion of ranks to percentage in range (0,1]
* @return A pointer to a new RANK aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_rank_aggregation(rank_method method,
Expand All @@ -431,6 +492,7 @@ std::unique_ptr<Base> make_rank_aggregation(rank_method method,
* of the list rows.
*
* @param null_handling Indicates whether to include/exclude nulls in list elements.
* @return A pointer to a new COLLECT_LIST aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_collect_list_aggregation(
Expand All @@ -450,17 +512,28 @@ std::unique_ptr<Base> make_collect_list_aggregation(
* equal.
* @param nans_equal Flag to specify whether NaN values in floating point column should be
* considered equal.
* @return A pointer to a new COLLECT_SET aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::UNEQUAL);

/// Factory to create a LAG aggregation
/**
* @brief Factory to create a LAG aggregation
*
* @param offset The number of rows to lag the input
* @return A pointer to a new LAG aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_lag_aggregation(size_type offset);

/// Factory to create a LEAD aggregation
/**
* @brief Factory to create a LEAD aggregation
*
* @param offset The number of rows to lead the input
* @return A pointer to a new LEAD aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_lead_aggregation(size_type offset);

Expand All @@ -471,7 +544,7 @@ std::unique_ptr<Base> make_lead_aggregation(size_type offset);
* @param[in] user_defined_aggregator A string containing the aggregator code
* @param[in] output_type expected output type
*
* @return aggregation unique pointer housing user_defined_aggregator string.
* @return Aggregation unique pointer housing user_defined_aggregator string.
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_udf_aggregation(udf_type type,
Expand All @@ -486,6 +559,8 @@ std::unique_ptr<Base> make_udf_aggregation(udf_type type,
* groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the
* input lists column to be non-nullable (the child column containing list entries is not subjected
* to this requirement).
*
* @return A pointer to a new MERGE_LISTS aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_lists_aggregation();
Expand All @@ -510,6 +585,7 @@ std::unique_ptr<Base> make_merge_lists_aggregation();
* during dropping duplicate list entries.
* @param nans_equal Flag to specify whether NaN values in floating point column should be
* considered equal during dropping duplicate list entries.
* @return A pointer to a new MERGE_SETS aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL,
Expand All @@ -526,6 +602,8 @@ std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = nu
*
* The input `M2` aggregation values are expected to be all non-negative numbers, since they
* were output from `M2` aggregation.
*
* @return A pointer to a new MERGE_M2 aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_merge_m2_aggregation();
Expand All @@ -538,6 +616,7 @@ std::unique_ptr<Base> make_merge_m2_aggregation();
* @param min_periods Minimum number of non-null observations required to produce a result.
* @param ddof Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N is
* the number of non-null observations.
* @return A pointer to a new COVARIANCE aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_covariance_aggregation(size_type min_periods = 1, size_type ddof = 1);
Expand All @@ -550,6 +629,7 @@ std::unique_ptr<Base> make_covariance_aggregation(size_type min_periods = 1, siz
*
* @param type correlation_type
* @param min_periods Minimum number of non-null observations required to produce a result.
* @return A pointer to a new CORRELATION aggregation
*/
template <typename Base = aggregation>
std::unique_ptr<Base> make_correlation_aggregation(correlation_type type,
Expand Down Expand Up @@ -587,7 +667,7 @@ std::unique_ptr<Base> make_correlation_aggregation(correlation_type type,
* the computed tdigests: A value of 1000 will result in a tdigest containing no
* more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information.
*
* @returns A TDIGEST aggregation object.
* @return A pointer to a new TDIGEST aggregation
*/
template <typename Base>
std::unique_ptr<Base> make_tdigest_aggregation(int max_centroids = 1000);
Expand Down Expand Up @@ -625,7 +705,7 @@ std::unique_ptr<Base> make_tdigest_aggregation(int max_centroids = 1000);
* the computed tdigests: A value of 1000 will result in a tdigest containing no
* more than 1000 centroids (32 bytes each). Higher result in more accurate tdigest information.
*
* @returns A MERGE_TDIGEST aggregation object.
* @return A MERGE_TDIGEST aggregation object.
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
*/
template <typename Base>
std::unique_ptr<Base> make_merge_tdigest_aggregation(int max_centroids = 1000);
Expand Down