From 763c53ac2b915781c09fb3d2f4daa1c240fdbe15 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 31 Aug 2021 23:31:44 +0530
Subject: [PATCH 01/79] add CORR aggregation to groupby, headers, classes,
 visitor(sort)

---
 cpp/include/cudf/aggregation.hpp              | 13 +++++++-
 .../cudf/detail/aggregation/aggregation.hpp   | 30 +++++++++++++++++++
 cpp/src/aggregation/aggregation.cpp           | 20 +++++++++++++
 cpp/src/groupby/sort/aggregate.cpp            | 23 ++++++++++++++
 cpp/src/groupby/sort/group_reductions.hpp     | 18 ++++++++++-
 5 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index ff665e2706a..1d03fb613df 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -87,7 +87,8 @@ class aggregation {
     CUDA,            ///< CUDA UDF based reduction
     MERGE_LISTS,     ///< merge multiple lists values into one list
     MERGE_SETS,      ///< merge multiple lists values into one list then drop duplicate entries
-    MERGE_M2         ///< merge partial values of M2 aggregation
+    MERGE_M2,        ///< merge partial values of M2 aggregation,
+    CORR,            ///< correlation among multiple columns
   };
 
   aggregation() = delete;
@@ -488,5 +489,15 @@ std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = nu
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_merge_m2_aggregation();
 
+/**
+ * @brief Factory to create a CORR aggregation
+ *
+ * Compute correlation matrix amond the input columns.
+ * The input columns are child columns of a non-nullable struct columns.
+ *
+ */
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_corr_aggregation();
+
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 4e4c63ae517..e2177e64ba0 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -91,6 +91,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class merge_sets_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class merge_m2_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
+                                                          class corr_aggregation const& agg);
 };
 
 class aggregation_finalizer {  // Declares the interface for the finalizer
@@ -125,6 +127,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class merge_lists_aggregation const& agg);
   virtual void visit(class merge_sets_aggregation const& agg);
   virtual void visit(class merge_m2_aggregation const& agg);
+  virtual void visit(class corr_aggregation const& agg);
 };
 
 /**
@@ -884,6 +887,25 @@ class merge_m2_aggregation final : public groupby_aggregation {
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived aggregation class for specifying CORR aggregation
+ */
+class corr_aggregation final : public groupby_aggregation {
+ public:
+  explicit corr_aggregation() : aggregation{CORR} {}
+
+  std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<corr_aggregation>(*this);
+  }
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Sentinel value used for `ARGMAX` aggregation.
  *
@@ -1118,6 +1140,12 @@ struct target_type_impl<SourceType, aggregation::MERGE_M2> {
   using type = struct_view;
 };
 
+// Always use struct for CORR
+template <typename SourceType>
+struct target_type_impl<SourceType, aggregation::CORR> {
+  using type = double;
+};
+
 /**
  * @brief Helper alias to get the accumulator type for performing aggregation
  * `k` on elements of type `Source`
@@ -1222,6 +1250,8 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin
       return f.template operator()<aggregation::MERGE_SETS>(std::forward<Ts>(args)...);
     case aggregation::MERGE_M2:
       return f.template operator()<aggregation::MERGE_M2>(std::forward<Ts>(args)...);
+    case aggregation::CORR:
+      return f.template operator()<aggregation::CORR>(std::forward<Ts>(args)...);
     default: {
 #ifndef __CUDA_ARCH__
       CUDF_FAIL("Unsupported aggregation.");
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index f0c522257fb..07883be1491 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -202,6 +202,12 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, corr_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
+
 // aggregation_finalizer ----------------------------------------
 
 void aggregation_finalizer::visit(aggregation const& agg) {}
@@ -346,6 +352,11 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
+void aggregation_finalizer::visit(corr_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
+
 }  // namespace detail
 
 std::vector<std::unique_ptr<aggregation>> aggregation::get_simple_aggregations(
@@ -664,6 +675,15 @@ std::unique_ptr<Base> make_merge_m2_aggregation()
 template std::unique_ptr<aggregation> make_merge_m2_aggregation<aggregation>();
 template std::unique_ptr<groupby_aggregation> make_merge_m2_aggregation<groupby_aggregation>();
 
+/// Factory to create a CORR aggregation
+template <typename Base>
+std::unique_ptr<Base> make_corr_aggregation()
+{
+  return std::make_unique<detail::corr_aggregation>();
+}
+template std::unique_ptr<aggregation> make_corr_aggregation<aggregation>();
+template std::unique_ptr<groupby_aggregation> make_corr_aggregation<groupby_aggregation>();
+
 namespace detail {
 namespace {
 struct target_type_functor {
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 726b51b7702..718fd191db1 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -524,6 +524,29 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
     detail::group_merge_m2(
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 };
+/**
+ * @brief Perform correlation among child columns of non-nullable struct column.
+ *
+ * The output of this aggregation is also a non-nullable struct column. The child columns of the
+ * output struct column are the corresponding correlation of each input child column.
+ *
+ * The correlation is done for each group of the input struct column.
+ *
+ */
+template <>
+void aggregate_result_functor::operator()<aggregation::CORR>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) { return; }
+
+  cache.add_result(col_idx,
+                   agg,
+                   detail::group_corr(get_grouped_values(),
+                                      helper.group_offsets(stream),
+                                      helper.group_labels(stream),
+                                      helper.num_groups(stream),
+                                      stream,
+                                      mr));
+};
 
 }  // namespace detail
 
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 2770162da2d..6bb87d7ea6a 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -441,7 +441,23 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
                                        size_type num_groups,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
-
+/**
+ * @brief Internal API to find correlation of child columns of a non-nullable struct column.
+ * TODO fill documentation.
+ *
+ * @param values Grouped values (tuples of values `(valid_count, mean, M2)`) to merge.
+ * @param group_offsets Offsets of groups' starting points within @p values.
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param num_groups Number of groups.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> group_corr(column_view const& values,
+                                   cudf::device_span<size_type const> group_offsets,
+                                   cudf::device_span<size_type const> group_labels,
+                                   size_type num_groups,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr);
 /** @endinternal
  *
  */

From 4c989a953e92a028de620c0e311c063d258ee1ca Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 31 Aug 2021 23:38:45 +0530
Subject: [PATCH 02/79] add group_corr.cu

---
 cpp/CMakeLists.txt                 |   1 +
 cpp/src/groupby/sort/group_corr.cu | 279 +++++++++++++++++++++++++++++
 2 files changed, 280 insertions(+)
 create mode 100644 cpp/src/groupby/sort/group_corr.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 18af85c98e0..189638e5d08 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -218,6 +218,7 @@ add_library(cudf
     src/groupby/sort/group_argmax.cu
     src/groupby/sort/group_argmin.cu
     src/groupby/sort/group_collect.cu
+    src/groupby/sort/group_corr.cu
     src/groupby/sort/group_count.cu
     src/groupby/sort/group_m2.cu
     src/groupby/sort/group_max.cu
diff --git a/cpp/src/groupby/sort/group_corr.cu b/cpp/src/groupby/sort/group_corr.cu
new file mode 100644
index 00000000000..dbe64f0d54c
--- /dev/null
+++ b/cpp/src/groupby/sort/group_corr.cu
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/binaryop.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/unary.hpp>
+#include <cudf/detail/valid_if.cuh>
+#include <cudf/dictionary/detail/iterator.cuh>
+#include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/span.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <memory>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+#include <type_traits>
+#include "cudf/types.hpp"
+#include "groupby/sort/group_reductions.hpp"
+#include "thrust/functional.h"
+#include "thrust/iterator/counting_iterator.h"
+#include "thrust/iterator/zip_iterator.h"
+
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/reduce.h>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+namespace {
+
+template <typename T>
+constexpr bool is_double_convertible()
+{
+  return std::is_convertible_v<T, double> || std::is_constructible_v<double, T>;
+}
+
+struct is_double_convertible_impl {
+  template <typename T>
+  bool operator()()
+  {
+    return is_double_convertible<T>();
+  }
+};
+
+/**
+ * @brief Type casts each element of the column to `CastType`
+ *
+ */
+template <typename CastType>
+struct type_casted_accessor {
+  template <typename Element>
+  CUDA_DEVICE_CALLABLE CastType operator()(cudf::size_type i, column_device_view const& col) const
+  {
+    if constexpr (column_device_view::has_element_accessor<Element>() and
+                  std::is_convertible_v<Element, CastType>)
+      return static_cast<CastType>(col.element<Element>(i));
+    return {};
+  }
+};
+
+template <typename ResultType>
+struct corr_transform {  // : thrust::unary_function<size_type, ResultType>
+  column_device_view const d_values_0, d_values_1;
+  ResultType const *d_means_0, *d_means_1;
+  ResultType const *d_stddev_0, *d_stddev_1;
+  size_type const* d_group_sizes;
+  size_type const* d_group_labels;
+  size_type ddof{1};  // TODO update based on bias.
+
+  __device__ ResultType operator()(size_type i)
+  {
+    if (d_values_0.is_null(i) or d_values_1.is_null(i)) return 0.0;
+
+    // This has to be device dispatch because x and y type may differ
+    auto x = type_dispatcher(d_values_0.type(), type_casted_accessor<ResultType>{}, i, d_values_0);
+    auto y = type_dispatcher(d_values_1.type(), type_casted_accessor<ResultType>{}, i, d_values_1);
+
+    size_type group_idx  = d_group_labels[i];
+    size_type group_size = d_group_sizes[group_idx];
+
+    // prevent divide by zero error
+    if (group_size == 0 or group_size - ddof <= 0) return 0.0;
+
+    ResultType xmean   = d_means_0[group_idx];
+    ResultType ymean   = d_means_1[group_idx];
+    ResultType xstddev = d_stddev_0[group_idx];
+    ResultType ystddev = d_stddev_1[group_idx];
+    return (x - xmean) * (y - ymean) / (group_size - ddof) / xstddev / ystddev;
+  }
+};
+
+/*
+sum((x-xu)*(y-yu))
+transform_output_iterator /N-1, stdx, stdy  how do you know the indices? we can not.
+So,
+(x-xu)*(y-yu))/N-1/stdx/stdy as single iterator., then reduce_by_key.
+very similar to var_transform in group_std.
+*/
+
+std::tuple<std::unique_ptr<column>, std::unique_ptr<column>> group_mean_stddev(
+  column_view const& values_0,
+  cudf::device_span<size_type const> group_offsets,
+  cudf::device_span<size_type const> group_labels,
+  size_type num_groups,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  auto sum1   = detail::group_sum(values_0, num_groups, group_labels, stream, mr);
+  auto count1 = values_0.nullable()
+                  ? detail::group_count_valid(values_0, group_labels, num_groups, stream, mr)
+                  : detail::group_count_all(group_offsets, num_groups, stream, mr);
+  auto mean1 =
+    cudf::detail::binary_operation(*sum1,
+                                   *count1,
+                                   binary_operator::DIV,
+                                   cudf::detail::target_type(values_0.type(), aggregation::MEAN),
+                                   stream,
+                                   mr);
+
+  auto var1    = detail::group_var(values_0,
+                                *mean1,
+                                *count1,
+                                group_labels,
+                                1,  // default var_agg._ddof,
+                                stream,
+                                mr);
+  auto stddev1 = cudf::detail::unary_operation(*var1, unary_operator::SQRT, stream, mr);
+  return std::make_tuple(std::move(mean1), std::move(stddev1));
+}
+
+}  // namespace
+
+// TODO Eventually this function should accept values_0, values_1, not a struct.
+std::unique_ptr<column> group_corr(column_view const& values,
+                                   cudf::device_span<size_type const> group_offsets,
+                                   cudf::device_span<size_type const> group_labels,
+                                   size_type num_groups,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(values.type().id() == type_id::STRUCT,
+               "Input to `group_corr` must be a structs column.");
+  CUDF_EXPECTS(values.num_children() == 2,
+               "Input to `group_corr` must be a structs column having 2 children columns.");
+  CUDF_EXPECTS(values.nullable() == false,
+               "Input to `group_corr` must be a non-nullable structs column.");
+  std::cout << "size=" << values.size() << std::endl;
+  std::cout << "num_children=" << values.num_children() << std::endl;
+
+  using result_type = id_to_type<type_id::FLOAT64>;
+  static_assert(
+    std::is_same_v<cudf::detail::target_type_t<result_type, aggregation::Kind::CORR>, result_type>);
+
+  // check if each child type can be converted to float64.
+  bool const is_convertible =
+    std::all_of(values.child_begin(), values.child_end(), [](auto const& c) {
+      return type_dispatcher(c.type(), is_double_convertible_impl{});
+    });
+  CUDF_EXPECTS(is_convertible,
+               "Input to `group_corr` must be a structs column having all children columns of type "
+               "convertible to float64.");
+
+  // TODO calculate SUM
+  // TODO calculate COUNT_VALID  (need to do for 2 seperately. for MEAN, and
+  // bitmask_and->COUNT_VALID for CORR.)
+  // TODO calculate MEAN
+  // TODO calculate VARIANCE
+  // TODO calculate STDDEV
+  // TODO calculate CORR. (requires MEAN1, MEAN2, COUNT_VALID_ANDed, STDDEV1, STDDEV2)
+  // TODO shuffle.
+
+  auto const& values_0 = values.child(0);
+  auto const& values_1 = values.child(1);
+  // TODO fix caching of child sum, count_valid, mean, variance, stddev. [unsupported due to
+  // result_cache design]
+  auto [mean0, stddev0] =
+    group_mean_stddev(values_0, group_offsets, group_labels, num_groups, stream, mr);
+  auto [mean1, stddev1] =
+    group_mean_stddev(values_1, group_offsets, group_labels, num_groups, stream, mr);
+
+  auto mean0_ptr   = mean0->mutable_view().begin<result_type>();
+  auto mean1_ptr   = mean1->mutable_view().begin<result_type>();
+  auto stddev0_ptr = stddev0->mutable_view().begin<result_type>();
+  auto stddev1_ptr = stddev1->mutable_view().begin<result_type>();
+
+  // TODO replace with ANDed bitmask. (values, stddev)
+  auto count1 = values_0.nullable()
+                  ? detail::group_count_valid(values_0, group_labels, num_groups, stream, mr)
+                  : detail::group_count_all(group_offsets, num_groups, stream, mr);
+
+  auto d_values_0 = column_device_view::create(values_0, stream);
+  auto d_values_1 = column_device_view::create(values_1, stream);
+  corr_transform<result_type> corr_transform_op{*d_values_0,
+                                                *d_values_1,
+                                                mean0_ptr,
+                                                mean1_ptr,
+                                                stddev0_ptr,
+                                                stddev1_ptr,
+                                                count1->view().data<size_type>(),
+                                                group_labels.begin()};
+
+  // result
+  auto const any_nulls = std::any_of(
+    values.child_begin(), values.child_end(), [](auto const& c) { return c.has_nulls(); });
+  auto mask_type = any_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED;
+
+  auto result =
+    make_numeric_column(data_type(type_to_id<result_type>()), num_groups, mask_type, stream, mr);
+  auto d_result = result->mutable_view().begin<result_type>();
+
+  auto corr_iter =
+    thrust::make_transform_iterator(thrust::make_counting_iterator(0), corr_transform_op);
+
+  thrust::reduce_by_key(rmm::exec_policy(stream),
+                        group_labels.begin(),
+                        group_labels.end(),
+                        corr_iter,
+                        thrust::make_discard_iterator(),
+                        d_result);
+  return result;
+
+  // auto result_M2s = make_numeric_column(
+  //   data_type(type_to_id<result_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
+  // auto validities = rmm::device_uvector<int8_t>(num_groups, stream);
+
+  // // Perform merging for all the aggregations. Their output (and their validity data) are written
+  // // out concurrently through an output zip iterator.
+  // using iterator_tuple  = thrust::tuple<size_type*, result_type*, result_type*, int8_t*>;
+  // using output_iterator = thrust::zip_iterator<iterator_tuple>;
+  // auto const out_iter =
+  //   output_iterator{thrust::make_tuple(result_counts->mutable_view().template data<size_type>(),
+  //                                      result_means->mutable_view().template data<result_type>(),
+  //                                      result_M2s->mutable_view().template data<result_type>(),
+  //                                      validities.begin())};
+
+  // auto const count_valid = values.child(0);
+  // auto const mean_values = values.child(1);
+  // auto const M2_values   = values.child(2);
+  // auto const iter        = thrust::make_counting_iterator<size_type>(0);
+
+  // auto const fn = merge_fn<result_type>{group_offsets.begin(),
+  //                                       count_valid.template begin<size_type>(),
+  //                                       mean_values.template begin<result_type>(),
+  //                                       M2_values.template begin<result_type>()};
+  // thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn);
+
+  // // Generate bitmask for the output.
+  // // Only mean and M2 values can be nullable. Count column must be non-nullable.
+  // auto [null_mask, null_count] = cudf::detail::valid_if(
+  //   validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);
+  // if (null_count > 0) {
+  //   result_means->set_null_mask(null_mask, null_count);           // copy null_mask
+  //   result_M2s->set_null_mask(std::move(null_mask), null_count);  // take over null_mask
+  // }
+
+  // Output is a structs column containing the merged values of `COUNT_VALID`, `MEAN`, and `M2`.
+
+  return result;
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf

From 015795cf875b75f0088be30f5e38c11bcacb6363 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 31 Aug 2021 23:38:57 +0530
Subject: [PATCH 03/79] add unit test temporarily

---
 cpp/tests/groupby/mean_tests.cpp | 59 ++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/cpp/tests/groupby/mean_tests.cpp b/cpp/tests/groupby/mean_tests.cpp
index 613e1555b79..9bceebfb241 100644
--- a/cpp/tests/groupby/mean_tests.cpp
+++ b/cpp/tests/groupby/mean_tests.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cmath>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/utilities/traits.hpp>
 
@@ -160,5 +161,63 @@ TEST_F(groupby_dictionary_mean_test, basic)
     keys, vals, expect_keys, expect_vals, cudf::make_mean_aggregation<groupby_aggregation>());
 }
 
+struct groupby_corr_test : public cudf::test::BaseFixture {
+};
+template <typename T>
+using fwcw    = fixed_width_column_wrapper<T>;
+using structs = structs_column_wrapper;
+
+TEST_F(groupby_corr_test, basic)
+{
+  using K  = int32_t;
+  using M0 = uint8_t;
+  using M1 = int16_t;
+  using R  = cudf::detail::target_type_t<M0, aggregation::CORR>;
+
+  // clang-format off
+  auto keys     = fwcw<K>  { 1,    2,    3,    1,    2,    2,    1,    3,    3,    2  };
+  auto member_0 = fwcw<M0>{{ 1,    1,    1,    2,    2,    3,    3,    1,    1,    4  }};//, null_at(1)};
+  auto member_1 = fwcw<M1>{{ 1,    1,    1,    2,   -2,    3,    3,    1,    1,   -4 }};//, null_at(7)};
+  auto values   = structs{{member_0, member_1}};//, null_at(4)};
+  // clang-format on
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3});
+  fixed_width_column_wrapper<R, double> expect_vals{
+    {1.000000, -0.41522739926869984, std::numeric_limits<double>::quiet_NaN()}};  //, null_at(2)};
+  // clang-format on
+
+  auto agg = cudf::make_corr_aggregation<groupby_aggregation>();
+  std::vector<groupby::aggregation_request> requests;
+  requests.emplace_back(groupby::aggregation_request());
+  requests[0].values = values;
+
+  requests[0].aggregations.push_back(std::move(agg));
+  requests.emplace_back(groupby::aggregation_request());
+  // WAR to force groupby to use sort implementation
+  requests[0].aggregations.push_back(make_nth_element_aggregation<groupby_aggregation>(0));
+
+  requests[1].values = column_view(values).child(0);
+  requests[1].aggregations.push_back(cudf::make_mean_aggregation<groupby_aggregation>());
+  requests[1].aggregations.push_back(cudf::make_std_aggregation<groupby_aggregation>());
+  requests.emplace_back(groupby::aggregation_request());
+  requests[2].values = column_view(values).child(1);
+  requests[2].aggregations.push_back(cudf::make_mean_aggregation<groupby_aggregation>());
+  requests[2].aggregations.push_back(cudf::make_std_aggregation<groupby_aggregation>());
+
+  groupby::groupby gb_obj(table_view({keys}));
+  auto result = gb_obj.aggregate(requests);
+
+  cudf::test::print(*result.second[0].results[0]);
+  cudf::test::print(*result.second[1].results[0]);
+  cudf::test::print(*result.second[1].results[1]);
+  cudf::test::print(*result.second[2].results[0]);
+  cudf::test::print(*result.second[2].results[1]);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({expect_keys}), result.first->view());
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+    expect_vals, *result.second[0].results[0], debug_output_level::ALL_ERRORS);
+  // test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg));
+}
+
 }  // namespace test
 }  // namespace cudf

From ba6e50af611def7e73789be74c57971068f1fe7e Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 1 Sep 2021 18:02:13 -0700
Subject: [PATCH 04/79] create new PR for pearson groupby correlation

---
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 13bfa49057c..19605a60d8d 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -38,17 +38,13 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
+            CORRELATION 'cudf::aggregation::CORRELATION'
+
         Kind kind
 
     cdef cppclass rolling_aggregation:
         aggregation.Kind kind
 
-    cdef cppclass groupby_aggregation:
-        aggregation.Kind kind
-
-    cdef cppclass groupby_scan_aggregation:
-        aggregation.Kind kind
-
     ctypedef enum udf_type:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'

From b198a5158f70461ba2d8361dfe9a93f8baa55a26 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 2 Sep 2021 16:32:26 -0700
Subject: [PATCH 05/79] adding corr. func in python

---
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 2 ++
 python/cudf/cudf/core/groupby/groupby.py  | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 19605a60d8d..1bd500facac 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -102,3 +102,5 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
+
+    cdef unique_ptr[T] make_correlation_aggregation[T]() except +
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index fd425d9de76..b8de9c7b8cd 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1021,6 +1021,10 @@ def _mimic_pandas_order(
         result.index = self.obj.index
         return result
 
+    def correlation(self):
+        """
+        """
+
 
 class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     """

From 3d0030739742859c50a645a3d3d6e5cdab05263e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 6 Sep 2021 09:40:27 +0530
Subject: [PATCH 06/79] Revert "create new PR for pearson groupby correlation"

This reverts commit ba6e50af611def7e73789be74c57971068f1fe7e.
---
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 1bd500facac..b19f526d539 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -38,13 +38,17 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
-            CORRELATION 'cudf::aggregation::CORRELATION'
-
         Kind kind
 
     cdef cppclass rolling_aggregation:
         aggregation.Kind kind
 
+    cdef cppclass groupby_aggregation:
+        aggregation.Kind kind
+
+    cdef cppclass groupby_scan_aggregation:
+        aggregation.Kind kind
+
     ctypedef enum udf_type:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'

From 120043744c2b4da68cf35a4d7a2efa48f8631cbd Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 6 Sep 2021 09:40:45 +0530
Subject: [PATCH 07/79] Revert "adding corr. func in python"

This reverts commit b198a5158f70461ba2d8361dfe9a93f8baa55a26.
---
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 2 --
 python/cudf/cudf/core/groupby/groupby.py  | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index b19f526d539..13bfa49057c 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -106,5 +106,3 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
-
-    cdef unique_ptr[T] make_correlation_aggregation[T]() except +
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 045863c8892..d98a78efb18 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1058,10 +1058,6 @@ def _mimic_pandas_order(
         result.index = self.obj.index
         return result
 
-    def correlation(self):
-        """
-        """
-
 
 class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     """

From 60293cc679f172183a4a8a22e49d8de5bc9896d7 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 6 Sep 2021 12:22:50 +0530
Subject: [PATCH 08/79] rename CORR to CORRELATION, added correlation_type as
 arg

---
 cpp/include/cudf/aggregation.hpp              | 10 +++---
 .../cudf/detail/aggregation/aggregation.hpp   | 33 +++++++++++++------
 cpp/src/aggregation/aggregation.cpp           | 14 ++++----
 cpp/src/groupby/hash/groupby.cu               | 11 +++++++
 cpp/src/groupby/sort/aggregate.cpp            | 10 ++----
 cpp/src/groupby/sort/group_corr.cu            |  3 +-
 cpp/tests/groupby/mean_tests.cpp              |  5 +--
 7 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index 1d03fb613df..fdc4e966748 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -88,7 +88,7 @@ class aggregation {
     MERGE_LISTS,     ///< merge multiple lists values into one list
     MERGE_SETS,      ///< merge multiple lists values into one list then drop duplicate entries
     MERGE_M2,        ///< merge partial values of M2 aggregation,
-    CORR,            ///< correlation among multiple columns
+    CORRELATION,     ///< correlation between two sets of elements
   };
 
   aggregation() = delete;
@@ -144,6 +144,7 @@ class groupby_scan_aggregation : public virtual aggregation {
 };
 
 enum class udf_type : bool { CUDA, PTX };
+enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };
 
 /// Factory to create a SUM aggregation
 template <typename Base = aggregation>
@@ -490,14 +491,15 @@ template <typename Base = aggregation>
 std::unique_ptr<Base> make_merge_m2_aggregation();
 
 /**
- * @brief Factory to create a CORR aggregation
+ * @brief Factory to create a CORRELATION aggregation
  *
- * Compute correlation matrix amond the input columns.
+ * Compute correlation coefficient between two columns.
  * The input columns are child columns of a non-nullable struct columns.
  *
+ * @param[in] type: correlation_type
  */
 template <typename Base = aggregation>
-std::unique_ptr<Base> make_corr_aggregation();
+std::unique_ptr<Base> make_correlation_aggregation(correlation_type type);
 
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index e2177e64ba0..2f7dbd73cb8 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -92,7 +92,7 @@ class simple_aggregations_collector {  // Declares the interface for the simple
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class merge_m2_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
-                                                          class corr_aggregation const& agg);
+                                                          class correlation_aggregation const& agg);
 };
 
 class aggregation_finalizer {  // Declares the interface for the finalizer
@@ -127,7 +127,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class merge_lists_aggregation const& agg);
   virtual void visit(class merge_sets_aggregation const& agg);
   virtual void visit(class merge_m2_aggregation const& agg);
-  virtual void visit(class corr_aggregation const& agg);
+  virtual void visit(class correlation_aggregation const& agg);
 };
 
 /**
@@ -888,15 +888,25 @@ class merge_m2_aggregation final : public groupby_aggregation {
 };
 
 /**
- * @brief Derived aggregation class for specifying CORR aggregation
+ * @brief Derived aggregation class for specifying CORRELATION aggregation
  */
-class corr_aggregation final : public groupby_aggregation {
+class correlation_aggregation final : public groupby_aggregation {
  public:
-  explicit corr_aggregation() : aggregation{CORR} {}
+  explicit correlation_aggregation(correlation_type type) : aggregation{CORRELATION}, _type{type} {}
+  correlation_type _type;
+
+  bool is_equal(aggregation const& _other) const override
+  {
+    if (!this->aggregation::is_equal(_other)) { return false; }
+    auto const& other = dynamic_cast<correlation_aggregation const&>(_other);
+    return (_type == other._type);
+  }
+
+  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
 
   std::unique_ptr<aggregation> clone() const override
   {
-    return std::make_unique<corr_aggregation>(*this);
+    return std::make_unique<correlation_aggregation>(*this);
   }
   std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
     data_type col_type, simple_aggregations_collector& collector) const override
@@ -904,6 +914,9 @@ class corr_aggregation final : public groupby_aggregation {
     return collector.visit(col_type, *this);
   }
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+
+ protected:
+  size_t hash_impl() const { return std::hash<int>{}(static_cast<int>(_type)); }
 };
 
 /**
@@ -1140,9 +1153,9 @@ struct target_type_impl<SourceType, aggregation::MERGE_M2> {
   using type = struct_view;
 };
 
-// Always use struct for CORR
+// Always use struct for CORRELATION
 template <typename SourceType>
-struct target_type_impl<SourceType, aggregation::CORR> {
+struct target_type_impl<SourceType, aggregation::CORRELATION> {
   using type = double;
 };
 
@@ -1250,8 +1263,8 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin
       return f.template operator()<aggregation::MERGE_SETS>(std::forward<Ts>(args)...);
     case aggregation::MERGE_M2:
       return f.template operator()<aggregation::MERGE_M2>(std::forward<Ts>(args)...);
-    case aggregation::CORR:
-      return f.template operator()<aggregation::CORR>(std::forward<Ts>(args)...);
+    case aggregation::CORRELATION:
+      return f.template operator()<aggregation::CORRELATION>(std::forward<Ts>(args)...);
     default: {
 #ifndef __CUDA_ARCH__
       CUDF_FAIL("Unsupported aggregation.");
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 07883be1491..175b480ce92 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -203,7 +203,7 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
 }
 
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
-  data_type col_type, corr_aggregation const& agg)
+  data_type col_type, correlation_aggregation const& agg)
 {
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
@@ -352,7 +352,7 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
-void aggregation_finalizer::visit(corr_aggregation const& agg)
+void aggregation_finalizer::visit(correlation_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
 }
@@ -677,12 +677,14 @@ template std::unique_ptr<groupby_aggregation> make_merge_m2_aggregation<groupby_
 
 /// Factory to create a CORR aggregation
 template <typename Base>
-std::unique_ptr<Base> make_corr_aggregation()
+std::unique_ptr<Base> make_correlation_aggregation(correlation_type type)
 {
-  return std::make_unique<detail::corr_aggregation>();
+  return std::make_unique<detail::correlation_aggregation>(type);
 }
-template std::unique_ptr<aggregation> make_corr_aggregation<aggregation>();
-template std::unique_ptr<groupby_aggregation> make_corr_aggregation<groupby_aggregation>();
+template std::unique_ptr<aggregation> make_correlation_aggregation<aggregation>(
+  correlation_type type);
+template std::unique_ptr<groupby_aggregation> make_correlation_aggregation<groupby_aggregation>(
+  correlation_type type);
 
 namespace detail {
 namespace {
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 87f83c6edd6..a9c64efc5db 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -157,6 +157,17 @@ class groupby_simple_aggregations_collector final
 
     return aggs;
   }
+
+  std::vector<std::unique_ptr<aggregation>> visit(
+    data_type, cudf::detail::correlation_aggregation const&) override
+  {
+    std::vector<std::unique_ptr<aggregation>> aggs;
+    aggs.push_back(make_sum_aggregation());
+    // COUNT_VALID
+    aggs.push_back(make_count_aggregation());
+
+    return aggs;
+  }
 };
 
 template <typename Map>
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 718fd191db1..aa7d9ac01c9 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -524,17 +524,13 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
     detail::group_merge_m2(
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 };
+
 /**
- * @brief Perform correlation among child columns of non-nullable struct column.
- *
- * The output of this aggregation is also a non-nullable struct column. The child columns of the
- * output struct column are the corresponding correlation of each input child column.
- *
- * The correlation is done for each group of the input struct column.
+ * @brief Perform correlation betweeen two child columns of non-nullable struct column.
  *
  */
 template <>
-void aggregate_result_functor::operator()<aggregation::CORR>(aggregation const& agg)
+void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) { return; }
 
diff --git a/cpp/src/groupby/sort/group_corr.cu b/cpp/src/groupby/sort/group_corr.cu
index dbe64f0d54c..35f29a1bb59 100644
--- a/cpp/src/groupby/sort/group_corr.cu
+++ b/cpp/src/groupby/sort/group_corr.cu
@@ -165,7 +165,8 @@ std::unique_ptr<column> group_corr(column_view const& values,
 
   using result_type = id_to_type<type_id::FLOAT64>;
   static_assert(
-    std::is_same_v<cudf::detail::target_type_t<result_type, aggregation::Kind::CORR>, result_type>);
+    std::is_same_v<cudf::detail::target_type_t<result_type, aggregation::Kind::CORRELATION>,
+                   result_type>);
 
   // check if each child type can be converted to float64.
   bool const is_convertible =
diff --git a/cpp/tests/groupby/mean_tests.cpp b/cpp/tests/groupby/mean_tests.cpp
index 9bceebfb241..9cbeca8163f 100644
--- a/cpp/tests/groupby/mean_tests.cpp
+++ b/cpp/tests/groupby/mean_tests.cpp
@@ -172,7 +172,7 @@ TEST_F(groupby_corr_test, basic)
   using K  = int32_t;
   using M0 = uint8_t;
   using M1 = int16_t;
-  using R  = cudf::detail::target_type_t<M0, aggregation::CORR>;
+  using R  = cudf::detail::target_type_t<M0, aggregation::CORRELATION>;
 
   // clang-format off
   auto keys     = fwcw<K>  { 1,    2,    3,    1,    2,    2,    1,    3,    3,    2  };
@@ -186,7 +186,8 @@ TEST_F(groupby_corr_test, basic)
     {1.000000, -0.41522739926869984, std::numeric_limits<double>::quiet_NaN()}};  //, null_at(2)};
   // clang-format on
 
-  auto agg = cudf::make_corr_aggregation<groupby_aggregation>();
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
   std::vector<groupby::aggregation_request> requests;
   requests.emplace_back(groupby::aggregation_request());
   requests[0].values = values;

From d421d6d818dd676c1b0717c694d89c11c5a9f835 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 00:03:48 +0530
Subject: [PATCH 09/79] add shallow_hash(column_view)

---
 cpp/include/cudf/column/column_view.hpp | 20 ++++++++++++++++++++
 cpp/include/cudf/types.hpp              | 12 ++++++++++++
 cpp/src/column/column_view.cpp          | 19 +++++++++++++++++++
 3 files changed, 51 insertions(+)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 7feaeafbad0..43386e926d2 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -633,4 +633,24 @@ column_view bit_cast(column_view const& input, data_type type);
  */
 mutable_column_view bit_cast(mutable_column_view const& input, data_type type);
 
+namespace detail {
+/**
+ * @brief Computes a hash value on the specified column view based on the shallow state of the
+ * column view.
+ *
+ * Only the shallow states (i.e pointers instead of data pointed by the pointer) of the column view
+ * are used in the hash computation. The hash value is computed  recursively on the children of the
+ * column view.
+ * The states used for the hash computation are: type, size, data pointer, null_mask pointer,
+ * offset, and the hash value of the children. Note that `null_count` is not used.
+ *
+ * Note: This hash function may result in different hash for a copy of the same column with exactly
+ * same contents. It is guarenteed to give same hash value for same column_view only, even if the
+ * underlying data changes.
+ *
+ * @param input The `column_view` to compute hash
+ * @return The hash value
+ */
+size_t shallow_hash(column_view const& input);
+}  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index e1037efb5c8..37c5a4b424c 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -345,3 +345,15 @@ static constexpr uint32_t DEFAULT_HASH_SEED = 0;
 
 /** @} */
 }  // namespace cudf
+
+// specialization of std::hash for cudf::data_type
+namespace std {
+template <>
+struct hash<cudf::data_type> {
+  std::size_t operator()(cudf::data_type const& type) const noexcept
+  {
+    return std::hash<int32_t>{}(static_cast<int32_t>(type.id())) * 127 +
+           std::hash<int32_t>{}(type.scale());
+  }
+};
+}  // namespace std
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 186669ae697..d1202108ae5 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -22,6 +22,7 @@
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <algorithm>
 #include <exception>
 #include <numeric>
 #include <vector>
@@ -76,6 +77,24 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
            ? 0
            : cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end);
 }
+
+// simple prime number multiplication algorithm.
+// Adapted from http://myeyesareblind.com/2017/02/06/Combine-hash-values/#apachecommons
+constexpr void combine_hash(size_t& h1, size_t h2) { h1 = h1 * 127 + h2; }
+
+size_t shallow_hash(column_view const& input)
+{
+  size_t hash = 0;
+  combine_hash(hash, std::hash<data_type>{}(input.type()));
+  combine_hash(hash, std::hash<size_type>{}(input.size()));
+  combine_hash(hash, std::hash<void const*>{}(input.head()));
+  combine_hash(hash, std::hash<void const*>{}(input.null_mask()));
+  combine_hash(hash, std::hash<size_type>{}(input.offset()));
+  std::for_each(input.child_begin(), input.child_end(), [&hash](auto const& child) {
+    combine_hash(hash, shallow_hash(child));
+  });
+  return hash;
+}
 }  // namespace detail
 
 // Immutable view constructor

From 9c4a9f315338545c9616914ac8e2a73b38596cb5 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 00:13:43 +0530
Subject: [PATCH 10/79] add CompoundTypes to type_lists

---
 cpp/include/cudf_test/type_lists.hpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp
index 5c1b0c6c458..49550e55e33 100644
--- a/cpp/include/cudf_test/type_lists.hpp
+++ b/cpp/include/cudf_test/type_lists.hpp
@@ -303,6 +303,18 @@ using FixedWidthTypesWithoutFixedPoint = Concat<NumericTypes, ChronoTypes>;
  */
 using ComparableTypes = Concat<NumericTypes, ChronoTypes, StringTypes>;
 
+/**
+ * @brief Provides a list of all compound types for use in GTest typed tests.
+ *
+ * Example:
+ * ```
+ * // Invokes all typed fixture tests for all compound types in libcudf
+ * TYPED_TEST_CASE(MyTypedFixture, cudf::test::CompoundTypes);
+ * ```
+ */
+using CompoundTypes =
+  cudf::test::Types<cudf::string_view, cudf::dictionary32, cudf::list_view, cudf::struct_view>;
+
 /**
  * @brief Provides a list of all types supported in libcudf for use in a GTest
  * typed test.

From a3dd235a48307b779d999644fa4a55679b759a40 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 00:14:04 +0530
Subject: [PATCH 11/79] add shallow_hash tests

---
 cpp/tests/CMakeLists.txt                      |   1 +
 cpp/tests/column/column_view_shallow_test.cpp | 211 ++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 cpp/tests/column/column_view_shallow_test.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d9553d463ab..8b608c2bfd0 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ endfunction()
 # - column tests ----------------------------------------------------------------------------------
 ConfigureTest(COLUMN_TEST
     column/bit_cast_test.cpp
+    column/column_view_shallow_test.cpp
     column/column_test.cu
     column/column_device_view_test.cu
     column/compound_test.cu)
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
new file mode 100644
index 00000000000..09e7e4eb689
--- /dev/null
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/null_mask.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+#include <type_traits>
+
+template <typename T>
+std::unique_ptr<cudf::column> example_column()
+{
+  // fixed_width, dict, string, list, struct
+  if constexpr (cudf::is_fixed_width<T>()) {
+    auto begin = thrust::make_counting_iterator(1);
+    auto end   = thrust::make_counting_iterator(16);
+    return cudf::test::fixed_width_column_wrapper<T>(begin, end).release();
+  } else if constexpr (cudf::is_dictionary<T>()) {
+    return cudf::test::dictionary_column_wrapper<std::string>(
+             {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""}, {1, 1, 1, 1, 1, 1, 1, 1, 0})
+      .release();
+  } else if constexpr (std::is_same_v<T, std::string> or std::is_same_v<T, cudf::string_view>) {
+    return cudf::test::strings_column_wrapper(
+             {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""})
+      .release();
+  } else if constexpr (std::is_same_v<T, cudf::list_view>) {
+    return cudf::test::lists_column_wrapper<int>({{1, 2, 3}, {4, 5}, {}, {6, 7, 8}}).release();
+  } else if constexpr (std::is_same_v<T, cudf::struct_view>) {
+    auto begin    = thrust::make_counting_iterator(1);
+    auto end      = thrust::make_counting_iterator(16);
+    auto member_0 = cudf::test::fixed_width_column_wrapper<int32_t>(begin, end);
+    auto member_1 = cudf::test::fixed_width_column_wrapper<int32_t>(begin + 10, end + 10);
+    return cudf::test::structs_column_wrapper({member_0, member_1}).release();
+  }
+  return {};
+}
+
+template <typename T>
+struct ColumnViewShallowTests : public cudf::test::BaseFixture {
+};
+
+using AllTypes = cudf::test::Concat<cudf::test::AllTypes, cudf::test::CompoundTypes>;
+TYPED_TEST_CASE(ColumnViewShallowTests, AllTypes);
+
+// Test for fixed_width, dict, string, list, struct
+// column_view, column_view = same hash.
+// column_view, make a copy = same hash.
+// column_view old, update data + new column_view     = same hash.
+// column_view old, add null_mask + new column_view   = diff hash.
+// column_view old, update nulls + new column_view    = same hash.
+// column_view old, set_null_count + new column_view  = same hash.
+//
+// column_view, diff column     = diff hash.
+// column_view, sliced[0, size) = same hash (for split too)
+// column_view, sliced[n:)      = diff hash (for split too)
+// column_view, bit_cast        = diff hash
+//
+// mutable_column_view, column_view = same hash
+// mutable_column_view, modified mutable_column_view = same hash
+//
+// update the children column data  = same hash
+// update the children column_views = diff hash
+
+TYPED_TEST(ColumnViewShallowTests, shallow_hash)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
+  // same = same hash
+  {
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view));
+  }
+  // copy column_view = same hash
+  {
+    auto col_view_copy = col_view;
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_copy));
+  }
+  // new column_view from column = same hash
+  {
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+  }
+  // update data + new column_view = same hash.
+  {
+    // update data by modifying some bits: fixed_width, string, dict, list, struct
+    if constexpr (cudf::is_fixed_width<TypeParam>()) {
+      // Update data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
+      cudf::set_null_mask(data, 2, 64, true);
+    } else {
+      // Update child(0).data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
+      cudf::set_null_mask(data, 2, 64, true);
+    }
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+  }
+  // add null_mask + new column_view = diff hash.
+  {
+    col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new));
+    col_view_new.null_count();
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new));
+    auto col_view_new2 = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view_new), shallow_hash(col_view_new2));
+  }
+  col_view = cudf::column_view{*col};  // updating after adding null_mask
+  // update nulls + new column_view = same hash.
+  {
+    cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false);
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+  }
+  // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT)
+  {
+    col->set_null_count(cudf::UNKNOWN_NULL_COUNT);
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+    col->set_null_count(col->size());
+    auto col_view_new2 = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new2));
+  }
+
+  // column_view, diff column = diff hash.
+  {
+    auto col_diff      = example_column<TypeParam>();
+    auto col_view_diff = cudf::column_view{*col_diff};
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_diff));
+  }
+  // column_view, sliced[0, size]  = same hash (for split too)
+  {
+    auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_sliced[0]));
+    auto col_split = cudf::split(col_view, {0});
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[0]));
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_split[1]));
+  }
+  // column_view, sliced[n:]       = diff hash (for split too)
+  {
+    auto col_sliced = cudf::slice(col_view, {1, col_view.size()});
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_sliced[0]));
+    auto col_split = cudf::split(col_view, {1});
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[0]));
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[1]));
+  }
+  // column_view, bit_cast         = diff hash
+  {
+    if constexpr (std::is_integral_v<TypeParam> and not std::is_same_v<TypeParam, bool>) {
+      using newType    = std::conditional_t<std::is_signed_v<TypeParam>,
+                                         std::make_unsigned_t<TypeParam>,
+                                         std::make_signed_t<TypeParam>>;
+      auto new_type    = cudf::data_type(cudf::type_to_id<newType>());
+      auto col_bitcast = cudf::bit_cast(col_view, new_type);
+      EXPECT_NE(shallow_hash(col_view), shallow_hash(col_bitcast));
+    }
+  }
+  // mutable_column_view, column_view = same hash
+  {
+    auto col_mutable = cudf::mutable_column_view{*col};
+    EXPECT_EQ(shallow_hash(col_mutable), shallow_hash(col_view));
+  }
+  // mutable_column_view, modified mutable_column_view = same hash
+  // update the children column data = same hash
+  {
+    auto col_mutable = cudf::mutable_column_view{*col};
+    if constexpr (cudf::is_fixed_width<TypeParam>()) {
+      // Update data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
+      cudf::set_null_mask(data, 1, 32, false);
+    } else {
+      // Update child(0).data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
+      cudf::set_null_mask(data, 1, 32, false);
+    }
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_mutable));
+    auto col_mutable_new = cudf::mutable_column_view{*col};
+    EXPECT_EQ(shallow_hash(col_mutable), shallow_hash(col_mutable_new));
+  }
+  // update the children column_views = diff hash
+  {
+    if constexpr (cudf::is_nested<TypeParam>()) {
+      col->child(0).set_null_mask(
+        cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL));
+      auto col_child_updated = cudf::mutable_column_view{*col};
+      EXPECT_NE(shallow_hash(col_view), shallow_hash(col_child_updated));
+    }
+  }
+}

From 2365d07960dfefc2ec5f22fb05c6b471de022945 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 01:29:27 +0530
Subject: [PATCH 12/79] add column copy test

---
 cpp/tests/column/column_view_shallow_test.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index 09e7e4eb689..b0f6eeac450 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -27,6 +27,7 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <memory>
 #include <type_traits>
 
 template <typename T>
@@ -67,6 +68,7 @@ TYPED_TEST_CASE(ColumnViewShallowTests, AllTypes);
 // Test for fixed_width, dict, string, list, struct
 // column_view, column_view = same hash.
 // column_view, make a copy = same hash.
+// column_view, copy column = diff hash
 // column_view old, update data + new column_view     = same hash.
 // column_view old, add null_mask + new column_view   = diff hash.
 // column_view old, update nulls + new column_view    = same hash.
@@ -97,6 +99,12 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
     auto col_view_copy = col_view;
     EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_copy));
   }
+  // copy column = diff hash
+  {
+    auto col_new       = std::make_unique<cudf::column>(*col);
+    auto col_view_copy = col_new->view();
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_copy));
+  }
   // new column_view from column = same hash
   {
     auto col_view_new = cudf::column_view{*col};

From 88726a451fc267c462ce7f9d1486f74bde2782ab Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 01:34:07 +0530
Subject: [PATCH 13/79] add shallow_equal(column_view) and tests

---
 cpp/include/cudf/column/column_view.hpp       |  17 +++
 cpp/src/column/column_view.cpp                |  14 ++
 cpp/tests/column/column_view_shallow_test.cpp | 133 ++++++++++++++++++
 3 files changed, 164 insertions(+)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 43386e926d2..a77351fe731 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -652,5 +652,22 @@ namespace detail {
  * @return The hash value
  */
 size_t shallow_hash(column_view const& input);
+
+/**
+ * @brief Equality operator for column views based on the shallow state of the column view.
+ *
+ * Only shallow states used for the hash computation are: type, size, data pointer, null_mask
+ * pointer, offset and the column_view of the children recursively. Note that `null_count` is not
+ * used.
+ *
+ * Note: This equality function will consider a column not equal to a copy of the same column with
+ * exactly same contents. It is guarenteed to return true for same column_view only, even if the
+ * underlying data changes.
+ *
+ * @param lhs The left `column_view` to compare
+ * @param rhs The right `column_view` to compare
+ * @return true if the shallow states of the two column views are equal
+ */
+bool shallow_equal(column_view const& lhs, column_view const& rhs);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index d1202108ae5..7e0bde86b74 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -95,6 +95,20 @@ size_t shallow_hash(column_view const& input)
   });
   return hash;
 }
+
+bool is_shallow_equal(column_view const& lhs, column_view const& rhs)
+{
+  return (lhs.type() == rhs.type()) and (lhs.size() == rhs.size()) and
+         (lhs.head() == rhs.head()) and (lhs.null_mask() == rhs.null_mask()) and
+         (lhs.offset() == rhs.offset()) and
+         std::equal(lhs.child_begin(),
+                    lhs.child_end(),
+                    rhs.child_begin(),
+                    rhs.child_end(),
+                    [](auto const& lhs_child, auto const& rhs_child) {
+                      return is_shallow_equal(lhs_child, rhs_child);
+                    });
+}
 }  // namespace detail
 
 // Immutable view constructor
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index b0f6eeac450..25af9b968e6 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -217,3 +217,136 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
     }
   }
 }
+
+TYPED_TEST(ColumnViewShallowTests, shallow_equal)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
+  // same = same hash
+  {
+    EXPECT_TRUE(shallow_equal(col_view, col_view));
+  }
+  // copy column_view = same hash
+  {
+    auto col_view_copy = col_view;
+    EXPECT_TRUE(shallow_equal(col_view, col_view_copy));
+  }
+  // copy column = diff hash
+  {
+    auto col_new       = std::make_unique<cudf::column>(*col);
+    auto col_view_copy = col_new->view();
+    EXPECT_FALSE(shallow_equal(col_view, col_view_copy));
+  }
+  // new column_view from column = same hash
+  {
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+  }
+  // update data + new column_view = same hash.
+  {
+    // update data by modifying some bits: fixed_width, string, dict, list, struct
+    if constexpr (cudf::is_fixed_width<TypeParam>()) {
+      // Update data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
+      cudf::set_null_mask(data, 2, 64, true);
+    } else {
+      // Update child(0).data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
+      cudf::set_null_mask(data, 2, 64, true);
+    }
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+  }
+  // add null_mask + new column_view = diff hash.
+  {
+    col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_FALSE(shallow_equal(col_view, col_view_new));
+    col_view_new.null_count();
+    EXPECT_FALSE(shallow_equal(col_view, col_view_new));
+    auto col_view_new2 = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view_new, col_view_new2));
+  }
+  col_view = cudf::column_view{*col};  // updating after adding null_mask
+  // update nulls + new column_view = same hash.
+  {
+    cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false);
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+  }
+  // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT)
+  {
+    col->set_null_count(cudf::UNKNOWN_NULL_COUNT);
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+    col->set_null_count(col->size());
+    auto col_view_new2 = cudf::column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_view, col_view_new2));
+  }
+
+  // column_view, diff column = diff hash.
+  {
+    auto col_diff      = example_column<TypeParam>();
+    auto col_view_diff = cudf::column_view{*col_diff};
+    EXPECT_FALSE(shallow_equal(col_view, col_view_diff));
+  }
+  // column_view, sliced[0, size]  = same hash (for split too)
+  {
+    auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
+    EXPECT_TRUE(shallow_equal(col_view, col_sliced[0]));
+    auto col_split = cudf::split(col_view, {0});
+    EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
+    EXPECT_TRUE(shallow_equal(col_view, col_split[1]));
+  }
+  // column_view, sliced[n:]       = diff hash (for split too)
+  {
+    auto col_sliced = cudf::slice(col_view, {1, col_view.size()});
+    EXPECT_FALSE(shallow_equal(col_view, col_sliced[0]));
+    auto col_split = cudf::split(col_view, {1});
+    EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
+    EXPECT_FALSE(shallow_equal(col_view, col_split[1]));
+  }
+  // column_view, bit_cast         = diff hash
+  {
+    if constexpr (std::is_integral_v<TypeParam> and not std::is_same_v<TypeParam, bool>) {
+      using newType    = std::conditional_t<std::is_signed_v<TypeParam>,
+                                         std::make_unsigned_t<TypeParam>,
+                                         std::make_signed_t<TypeParam>>;
+      auto new_type    = cudf::data_type(cudf::type_to_id<newType>());
+      auto col_bitcast = cudf::bit_cast(col_view, new_type);
+      EXPECT_FALSE(shallow_equal(col_view, col_bitcast));
+    }
+  }
+  // mutable_column_view, column_view = same hash
+  {
+    auto col_mutable = cudf::mutable_column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_mutable, col_view));
+  }
+  // mutable_column_view, modified mutable_column_view = same hash
+  // update the children column data = same hash
+  {
+    auto col_mutable = cudf::mutable_column_view{*col};
+    if constexpr (cudf::is_fixed_width<TypeParam>()) {
+      // Update data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
+      cudf::set_null_mask(data, 1, 32, false);
+    } else {
+      // Update child(0).data
+      auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
+      cudf::set_null_mask(data, 1, 32, false);
+    }
+    EXPECT_TRUE(shallow_equal(col_view, col_mutable));
+    auto col_mutable_new = cudf::mutable_column_view{*col};
+    EXPECT_TRUE(shallow_equal(col_mutable, col_mutable_new));
+  }
+  // update the children column_views = diff hash
+  {
+    if constexpr (cudf::is_nested<TypeParam>()) {
+      col->child(0).set_null_mask(
+        cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL));
+      auto col_child_updated = cudf::mutable_column_view{*col};
+      EXPECT_FALSE(shallow_equal(col_view, col_child_updated));
+    }
+  }
+}

From d52509de3f69b02378f268b4540e0565a7c4589e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 12:23:26 +0530
Subject: [PATCH 14/79] update result_cache to use shallow_hash, shallow_equal

---
 .../cudf/detail/aggregation/result_cache.hpp  |  30 +++--
 cpp/src/aggregation/result_cache.cpp          |  33 +++--
 cpp/src/groupby/common/utils.hpp              |   2 +-
 cpp/src/groupby/hash/groupby.cu               | 106 +++++++---------
 cpp/src/groupby/sort/aggregate.cpp            | 115 +++++++++---------
 cpp/src/groupby/sort/functors.hpp             |   6 +-
 cpp/src/groupby/sort/scan.cpp                 |  31 +++--
 7 files changed, 152 insertions(+), 171 deletions(-)

diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index ebb1ea784e5..a15e15d7d01 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -23,15 +23,19 @@
 
 namespace cudf {
 namespace detail {
-struct aggregation_equality {
-  bool operator()(aggregation const& lhs, aggregation const& rhs) const
+struct pair_column_aggregation_equal_to {
+  bool operator()(std::pair<column_view, aggregation const&> const& lhs,
+                  std::pair<column_view, aggregation const&> const& rhs) const
   {
-    return lhs.is_equal(rhs);
+    return is_shallow_equal(lhs.first, rhs.first) and lhs.second.is_equal(rhs.second);
   }
 };
 
-struct aggregation_hash {
-  size_t operator()(aggregation const& key) const noexcept { return key.do_hash(); }
+struct pair_column_aggregation_hash {
+  size_t operator()(std::pair<column_view, aggregation const&> const& key) const noexcept
+  {
+    return shallow_hash(key.first) * 127 + key.second.do_hash();
+  }
 };
 
 class result_cache {
@@ -43,19 +47,19 @@ class result_cache {
 
   result_cache(size_t num_columns) : _cache(num_columns) {}
 
-  bool has_result(size_t col_idx, aggregation const& agg) const;
+  bool has_result(column_view const& input, aggregation const& agg) const;
 
-  void add_result(size_t col_idx, aggregation const& agg, std::unique_ptr<column>&& col);
+  void add_result(column_view const& input, aggregation const& agg, std::unique_ptr<column>&& col);
 
-  column_view get_result(size_t col_idx, aggregation const& agg) const;
+  column_view get_result(column_view const& input, aggregation const& agg) const;
 
-  std::unique_ptr<column> release_result(size_t col_idx, aggregation const& agg);
+  std::unique_ptr<column> release_result(column_view const& input, aggregation const& agg);
 
  private:
-  std::vector<std::unordered_map<std::reference_wrapper<aggregation const>,
-                                 std::pair<std::unique_ptr<aggregation>, std::unique_ptr<column>>,
-                                 aggregation_hash,
-                                 aggregation_equality>>
+  std::unordered_map<std::pair<column_view, std::reference_wrapper<aggregation const>>,
+                     std::pair<std::unique_ptr<aggregation>, std::unique_ptr<column>>,
+                     pair_column_aggregation_hash,
+                     pair_column_aggregation_equal_to>
     _cache;
 };
 
diff --git a/cpp/src/aggregation/result_cache.cpp b/cpp/src/aggregation/result_cache.cpp
index 36668af5355..04750f7fa98 100644
--- a/cpp/src/aggregation/result_cache.cpp
+++ b/cpp/src/aggregation/result_cache.cpp
@@ -19,38 +19,37 @@
 namespace cudf {
 namespace detail {
 
-bool result_cache::has_result(size_t col_idx, aggregation const& agg) const
+bool result_cache::has_result(column_view const& input, aggregation const& agg) const
 {
-  if (col_idx > _cache.size()) return false;
-
-  auto result_it = _cache[col_idx].find(agg);
-
-  return (result_it != _cache[col_idx].end());
+  return _cache.count({input, agg});
 }
 
-void result_cache::add_result(size_t col_idx, aggregation const& agg, std::unique_ptr<column>&& col)
+void result_cache::add_result(column_view const& input,
+                              aggregation const& agg,
+                              std::unique_ptr<column>&& col)
 {
   // We can't guarantee that agg will outlive the cache, so we need to take ownership of a copy.
   // To allow lookup by reference, make the key a reference and keep the owner in the value pair.
-  auto owned_agg  = agg.clone();
-  auto const& key = *owned_agg;
-  auto value      = std::make_pair(std::move(owned_agg), std::move(col));
-  _cache[col_idx].emplace(key, std::move(value));
+  auto owned_agg       = agg.clone();
+  auto const& key      = *owned_agg;
+  auto value           = std::make_pair(std::move(owned_agg), std::move(col));
+  _cache[{input, key}] = std::move(value);
 }
 
-column_view result_cache::get_result(size_t col_idx, aggregation const& agg) const
+column_view result_cache::get_result(column_view const& input, aggregation const& agg) const
 {
-  CUDF_EXPECTS(has_result(col_idx, agg), "Result does not exist in cache");
+  CUDF_EXPECTS(has_result(input, agg), "Result does not exist in cache");
 
-  auto result_it = _cache[col_idx].find(agg);
+  auto result_it = _cache.find({input, agg});
   return result_it->second.second->view();
 }
 
-std::unique_ptr<column> result_cache::release_result(size_t col_idx, aggregation const& agg)
+std::unique_ptr<column> result_cache::release_result(column_view const& input,
+                                                     aggregation const& agg)
 {
-  CUDF_EXPECTS(has_result(col_idx, agg), "Result does not exist in cache");
+  CUDF_EXPECTS(has_result(input, agg), "Result does not exist in cache");
 
-  auto result_it = _cache[col_idx].extract(agg);
+  auto result_it = _cache.extract({input, agg});
   return std::move(result_it.mapped().second);
 }
 
diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp
index 3da20fb9af3..129351c3d38 100644
--- a/cpp/src/groupby/common/utils.hpp
+++ b/cpp/src/groupby/common/utils.hpp
@@ -33,7 +33,7 @@ inline std::vector<aggregation_result> extract_results(host_span<RequestType con
 
   for (size_t i = 0; i < requests.size(); i++) {
     for (auto&& agg : requests[i].aggregations) {
-      results[i].results.emplace_back(cache.release_result(i, *agg));
+      results[i].results.emplace_back(cache.release_result(requests[i].values, *agg));
     }
   }
   return results;
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 87f83c6edd6..5c9aeefe524 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -161,7 +161,6 @@ class groupby_simple_aggregations_collector final
 
 template <typename Map>
 class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
-  size_t col_idx;
   column_view col;
   data_type result_type;
   cudf::detail::result_cache* sparse_results;
@@ -170,14 +169,13 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
   size_type const map_size;
   Map const& map;
   bitmask_type const* __restrict__ row_bitmask;
-  rmm::mr::device_memory_resource* mr;
   rmm::cuda_stream_view stream;
+  rmm::mr::device_memory_resource* mr;
 
  public:
   using cudf::detail::aggregation_finalizer::visit;
 
-  hash_compound_agg_finalizer(size_t col_idx,
-                              column_view col,
+  hash_compound_agg_finalizer(column_view col,
                               cudf::detail::result_cache* sparse_results,
                               cudf::detail::result_cache* dense_results,
                               device_span<size_type const> gather_map,
@@ -186,8 +184,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
                               bitmask_type const* row_bitmask,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
-    : col_idx(col_idx),
-      col(col),
+    : col(col),
       sparse_results(sparse_results),
       dense_results(dense_results),
       gather_map(gather_map),
@@ -203,7 +200,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
 
   auto to_dense_agg_result(cudf::aggregation const& agg)
   {
-    auto s                  = sparse_results->get_result(col_idx, agg);
+    auto s                  = sparse_results->get_result(col, agg);
     auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}),
                                                    gather_map.begin(),
                                                    gather_map.begin() + map_size,
@@ -240,43 +237,43 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
   // Declare overloads for each kind of aggregation to dispatch
   void visit(cudf::aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
-    dense_results->add_result(col_idx, agg, to_dense_agg_result(agg));
+    if (dense_results->has_result(col, agg)) return;
+    dense_results->add_result(col, agg, to_dense_agg_result(agg));
   }
 
   void visit(cudf::detail::min_aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
+    if (dense_results->has_result(col, agg)) return;
     if (result_type.id() == type_id::STRING) {
       auto transformed_agg = make_argmin_aggregation();
-      dense_results->add_result(col_idx, agg, gather_argminmax(*transformed_agg));
+      dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
     } else {
-      dense_results->add_result(col_idx, agg, to_dense_agg_result(agg));
+      dense_results->add_result(col, agg, to_dense_agg_result(agg));
     }
   }
 
   void visit(cudf::detail::max_aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
+    if (dense_results->has_result(col, agg)) return;
 
     if (result_type.id() == type_id::STRING) {
       auto transformed_agg = make_argmax_aggregation();
-      dense_results->add_result(col_idx, agg, gather_argminmax(*transformed_agg));
+      dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
     } else {
-      dense_results->add_result(col_idx, agg, to_dense_agg_result(agg));
+      dense_results->add_result(col, agg, to_dense_agg_result(agg));
     }
   }
 
   void visit(cudf::detail::mean_aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
+    if (dense_results->has_result(col, agg)) return;
 
     auto sum_agg   = make_sum_aggregation();
     auto count_agg = make_count_aggregation();
     this->visit(*sum_agg);
     this->visit(*count_agg);
-    column_view sum_result   = dense_results->get_result(col_idx, *sum_agg);
-    column_view count_result = dense_results->get_result(col_idx, *count_agg);
+    column_view sum_result   = dense_results->get_result(col, *sum_agg);
+    column_view count_result = dense_results->get_result(col, *count_agg);
 
     auto result =
       cudf::detail::binary_operation(sum_result,
@@ -285,19 +282,19 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
                                      cudf::detail::target_type(result_type, aggregation::MEAN),
                                      stream,
                                      mr);
-    dense_results->add_result(col_idx, agg, std::move(result));
+    dense_results->add_result(col, agg, std::move(result));
   }
 
   void visit(cudf::detail::var_aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
+    if (dense_results->has_result(col, agg)) return;
 
     auto sum_agg   = make_sum_aggregation();
     auto count_agg = make_count_aggregation();
     this->visit(*sum_agg);
     this->visit(*count_agg);
-    column_view sum_result   = sparse_results->get_result(col_idx, *sum_agg);
-    column_view count_result = sparse_results->get_result(col_idx, *count_agg);
+    column_view sum_result   = sparse_results->get_result(col, *sum_agg);
+    column_view count_result = sparse_results->get_result(col, *count_agg);
 
     auto values_view = column_device_view::create(col);
     auto sum_view    = column_device_view::create(sum_result);
@@ -315,47 +312,40 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
       col.size(),
       ::cudf::detail::var_hash_functor<Map>{
         map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
-    sparse_results->add_result(col_idx, agg, std::move(var_result));
-    dense_results->add_result(col_idx, agg, to_dense_agg_result(agg));
+    sparse_results->add_result(col, agg, std::move(var_result));
+    dense_results->add_result(col, agg, to_dense_agg_result(agg));
   }
 
   void visit(cudf::detail::std_aggregation const& agg) override
   {
-    if (dense_results->has_result(col_idx, agg)) return;
+    if (dense_results->has_result(col, agg)) return;
     auto var_agg = make_variance_aggregation(agg._ddof);
     this->visit(*dynamic_cast<cudf::detail::var_aggregation*>(var_agg.get()));
-    column_view variance = dense_results->get_result(col_idx, *var_agg);
+    column_view variance = dense_results->get_result(col, *var_agg);
 
     auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr);
-    dense_results->add_result(col_idx, agg, std::move(result));
+    dense_results->add_result(col, agg, std::move(result));
   }
 };
 // flatten aggs to filter in single pass aggs
-std::tuple<table_view,
-           std::vector<aggregation::Kind>,
-           std::vector<std::unique_ptr<aggregation>>,
-           std::vector<size_t>>
+std::tuple<table_view, std::vector<aggregation::Kind>, std::vector<std::unique_ptr<aggregation>>>
 flatten_single_pass_aggs(host_span<aggregation_request const> requests)
 {
   std::vector<column_view> columns;
   std::vector<std::unique_ptr<aggregation>> aggs;
   std::vector<aggregation::Kind> agg_kinds;
-  std::vector<size_t> col_ids;
 
-  for (size_t i = 0; i < requests.size(); i++) {
-    auto const& request = requests[i];
-    auto const& agg_v   = request.aggregations;
+  for (auto const& request : requests) {
+    auto const& agg_v = request.aggregations;
 
     std::unordered_set<aggregation::Kind> agg_kinds_set;
-    auto insert_agg =
-      [&](size_t i, column_view const& request_values, std::unique_ptr<aggregation>&& agg) {
-        if (agg_kinds_set.insert(agg->kind).second) {
-          agg_kinds.push_back(agg->kind);
-          aggs.push_back(std::move(agg));
-          columns.push_back(request_values);
-          col_ids.push_back(i);
-        }
-      };
+    auto insert_agg = [&](column_view const& request_values, std::unique_ptr<aggregation>&& agg) {
+      if (agg_kinds_set.insert(agg->kind).second) {
+        agg_kinds.push_back(agg->kind);
+        aggs.push_back(std::move(agg));
+        columns.push_back(request_values);
+      }
+    };
 
     auto values_type = cudf::is_dictionary(request.values.type())
                          ? cudf::dictionary_column_view(request.values).keys().type()
@@ -364,13 +354,12 @@ flatten_single_pass_aggs(host_span<aggregation_request const> requests)
       groupby_simple_aggregations_collector collector;
 
       for (auto& agg_s : agg->get_simple_aggregations(values_type, collector)) {
-        insert_agg(i, request.values, std::move(agg_s));
+        insert_agg(request.values, std::move(agg_s));
       }
     }
   }
 
-  return std::make_tuple(
-    table_view(columns), std::move(agg_kinds), std::move(aggs), std::move(col_ids));
+  return std::make_tuple(table_view(columns), std::move(agg_kinds), std::move(aggs));
 }
 
 /**
@@ -397,22 +386,14 @@ void sparse_to_dense_results(table_view const& keys,
   bitmask_type const* row_bitmask_ptr =
     skip_key_rows_with_nulls ? static_cast<bitmask_type*>(row_bitmask.data()) : nullptr;
 
-  for (size_t i = 0; i < requests.size(); i++) {
-    auto const& agg_v = requests[i].aggregations;
-    auto const& col   = requests[i].values;
+  for (auto const& request : requests) {
+    auto const& agg_v = request.aggregations;
+    auto const& col   = request.values;
 
     // Given an aggregation, this will get the result from sparse_results and
     // convert and return dense, compacted result
-    auto finalizer = hash_compound_agg_finalizer<Map>(i,
-                                                      col,
-                                                      sparse_results,
-                                                      dense_results,
-                                                      gather_map,
-                                                      map_size,
-                                                      map,
-                                                      row_bitmask_ptr,
-                                                      stream,
-                                                      mr);
+    auto finalizer = hash_compound_agg_finalizer<Map>(
+      col, sparse_results, dense_results, gather_map, map_size, map, row_bitmask_ptr, stream, mr);
     for (auto&& agg : agg_v) {
       agg->finalize(finalizer);
     }
@@ -500,7 +481,7 @@ void compute_single_pass_aggs(table_view const& keys,
                               rmm::cuda_stream_view stream)
 {
   // flatten the aggs to a table that can be operated on by aggregate_row
-  auto const [flattened_values, agg_kinds, aggs, col_ids] = flatten_single_pass_aggs(requests);
+  auto const [flattened_values, agg_kinds, aggs] = flatten_single_pass_aggs(requests);
 
   // make table that will hold sparse results
   table sparse_table = create_sparse_results_table(flattened_values, agg_kinds, stream);
@@ -528,7 +509,8 @@ void compute_single_pass_aggs(table_view const& keys,
   auto sparse_result_cols = sparse_table.release();
   for (size_t i = 0; i < aggs.size(); i++) {
     // Note that the cache will make a copy of this temporary aggregation
-    sparse_results->add_result(col_ids[i], *aggs[i], std::move(sparse_result_cols[i]));
+    sparse_results->add_result(
+      flattened_values.column(i), *aggs[i], std::move(sparse_result_cols[i]));
   }
 }
 
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 726b51b7702..b4143de7a86 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -64,10 +64,10 @@ struct aggregate_result_functor final : store_result_functor {
 template <>
 void aggregate_result_functor::operator()<aggregation::COUNT_VALID>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     get_grouped_values().nullable()
       ? detail::group_count_valid(
@@ -79,10 +79,10 @@ void aggregate_result_functor::operator()<aggregation::COUNT_VALID>(aggregation
 template <>
 void aggregate_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 }
@@ -90,10 +90,10 @@ void aggregate_result_functor::operator()<aggregation::COUNT_ALL>(aggregation co
 template <>
 void aggregate_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_sum(
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
@@ -102,10 +102,10 @@ void aggregate_result_functor::operator()<aggregation::SUM>(aggregation const& a
 template <>
 void aggregate_result_functor::operator()<aggregation::PRODUCT>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_product(
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
@@ -114,9 +114,9 @@ void aggregate_result_functor::operator()<aggregation::PRODUCT>(aggregation cons
 template <>
 void aggregate_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
-  cache.add_result(col_idx,
+  cache.add_result(values,
                    agg,
                    detail::group_argmax(get_grouped_values(),
                                         helper.num_groups(stream),
@@ -129,9 +129,9 @@ void aggregate_result_functor::operator()<aggregation::ARGMAX>(aggregation const
 template <>
 void aggregate_result_functor::operator()<aggregation::ARGMIN>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
-  cache.add_result(col_idx,
+  cache.add_result(values,
                    agg,
                    detail::group_argmin(get_grouped_values(),
                                         helper.num_groups(stream),
@@ -144,7 +144,7 @@ void aggregate_result_functor::operator()<aggregation::ARGMIN>(aggregation const
 template <>
 void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto result = [&]() {
     auto values_type = cudf::is_dictionary(values.type())
@@ -156,7 +156,7 @@ void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& a
     } else {
       auto argmin_agg = make_argmin_aggregation();
       operator()<aggregation::ARGMIN>(*argmin_agg);
-      column_view argmin_result = cache.get_result(col_idx, *argmin_agg);
+      column_view argmin_result = cache.get_result(values, *argmin_agg);
 
       // We make a view of ARGMIN result without a null mask and gather using
       // this mask. The values in data buffer of ARGMIN result corresponding
@@ -178,13 +178,13 @@ void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& a
     }
   }();
 
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto result = [&]() {
     auto values_type = cudf::is_dictionary(values.type())
@@ -196,7 +196,7 @@ void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& a
     } else {
       auto argmax_agg = make_argmax_aggregation();
       operator()<aggregation::ARGMAX>(*argmax_agg);
-      column_view argmax_result = cache.get_result(col_idx, *argmax_agg);
+      column_view argmax_result = cache.get_result(values, *argmax_agg);
 
       // We make a view of ARGMAX result without a null mask and gather using
       // this mask. The values in data buffer of ARGMAX result corresponding
@@ -218,20 +218,20 @@ void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& a
     }
   }();
 
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::MEAN>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto sum_agg   = make_sum_aggregation();
   auto count_agg = make_count_aggregation();
   operator()<aggregation::SUM>(*sum_agg);
   operator()<aggregation::COUNT_VALID>(*count_agg);
-  column_view sum_result   = cache.get_result(col_idx, *sum_agg);
-  column_view count_result = cache.get_result(col_idx, *count_agg);
+  column_view sum_result   = cache.get_result(values, *sum_agg);
+  column_view count_result = cache.get_result(values, *count_agg);
 
   // TODO (dm): Special case for timestamp. Add target_type_impl for it.
   //            Blocked until we support operator+ on timestamps
@@ -242,20 +242,20 @@ void aggregate_result_functor::operator()<aggregation::MEAN>(aggregation const&
                                    cudf::detail::target_type(values.type(), aggregation::MEAN),
                                    stream,
                                    mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::M2>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto const mean_agg = make_mean_aggregation();
   operator()<aggregation::MEAN>(*mean_agg);
-  auto const mean_result = cache.get_result(col_idx, *mean_agg);
+  auto const mean_result = cache.get_result(values, *mean_agg);
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_m2(get_grouped_values(), mean_result, helper.group_labels(stream), stream, mr));
 };
@@ -263,15 +263,15 @@ void aggregate_result_functor::operator()<aggregation::M2>(aggregation const& ag
 template <>
 void aggregate_result_functor::operator()<aggregation::VARIANCE>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto& var_agg  = dynamic_cast<cudf::detail::var_aggregation const&>(agg);
   auto mean_agg  = make_mean_aggregation();
   auto count_agg = make_count_aggregation();
   operator()<aggregation::MEAN>(*mean_agg);
   operator()<aggregation::COUNT_VALID>(*count_agg);
-  column_view mean_result = cache.get_result(col_idx, *mean_agg);
-  column_view group_sizes = cache.get_result(col_idx, *count_agg);
+  column_view mean_result = cache.get_result(values, *mean_agg);
+  column_view group_sizes = cache.get_result(values, *count_agg);
 
   auto result = detail::group_var(get_grouped_values(),
                                   mean_result,
@@ -280,31 +280,31 @@ void aggregate_result_functor::operator()<aggregation::VARIANCE>(aggregation con
                                   var_agg._ddof,
                                   stream,
                                   mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::STD>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto& std_agg = dynamic_cast<cudf::detail::std_aggregation const&>(agg);
   auto var_agg  = make_variance_aggregation(std_agg._ddof);
   operator()<aggregation::VARIANCE>(*var_agg);
-  column_view var_result = cache.get_result(col_idx, *var_agg);
+  column_view var_result = cache.get_result(values, *var_agg);
 
   auto result = cudf::detail::unary_operation(var_result, unary_operator::SQRT, stream, mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::QUANTILE>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto count_agg = make_count_aggregation();
   operator()<aggregation::COUNT_VALID>(*count_agg);
-  column_view group_sizes = cache.get_result(col_idx, *count_agg);
+  column_view group_sizes = cache.get_result(values, *count_agg);
   auto& quantile_agg      = dynamic_cast<cudf::detail::quantile_aggregation const&>(agg);
 
   auto result = detail::group_quantiles(get_sorted_values(),
@@ -315,17 +315,17 @@ void aggregate_result_functor::operator()<aggregation::QUANTILE>(aggregation con
                                         quantile_agg._interpolation,
                                         stream,
                                         mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::MEDIAN>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto count_agg = make_count_aggregation();
   operator()<aggregation::COUNT_VALID>(*count_agg);
-  column_view group_sizes = cache.get_result(col_idx, *count_agg);
+  column_view group_sizes = cache.get_result(values, *count_agg);
 
   auto result = detail::group_quantiles(get_sorted_values(),
                                         group_sizes,
@@ -335,13 +335,13 @@ void aggregate_result_functor::operator()<aggregation::MEDIAN>(aggregation const
                                         interpolation::LINEAR,
                                         stream,
                                         mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::NUNIQUE>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto& nunique_agg = dynamic_cast<cudf::detail::nunique_aggregation const&>(agg);
 
@@ -352,13 +352,13 @@ void aggregate_result_functor::operator()<aggregation::NUNIQUE>(aggregation cons
                                       nunique_agg._null_handling,
                                       stream,
                                       mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   auto& nth_element_agg = dynamic_cast<cudf::detail::nth_element_aggregation const&>(agg);
 
@@ -370,9 +370,9 @@ void aggregate_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation
   } else {
     CUDF_FAIL("Wrong count aggregation kind");
   }
-  column_view group_sizes = cache.get_result(col_idx, *count_agg);
+  column_view group_sizes = cache.get_result(values, *count_agg);
 
-  cache.add_result(col_idx,
+  cache.add_result(values,
                    agg,
                    detail::group_nth_element(get_grouped_values(),
                                              group_sizes,
@@ -388,7 +388,7 @@ void aggregate_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation
 template <>
 void aggregate_result_functor::operator()<aggregation::COLLECT_LIST>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
   auto const null_handling =
     dynamic_cast<cudf::detail::collect_list_aggregation const&>(agg)._null_handling;
@@ -398,13 +398,13 @@ void aggregate_result_functor::operator()<aggregation::COLLECT_LIST>(aggregation
                                       null_handling,
                                       stream,
                                       mr);
-  cache.add_result(col_idx, agg, std::move(result));
+  cache.add_result(values, agg, std::move(result));
 };
 
 template <>
 void aggregate_result_functor::operator()<aggregation::COLLECT_SET>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
   auto const null_handling =
     dynamic_cast<cudf::detail::collect_set_aggregation const&>(agg)._null_handling;
@@ -419,7 +419,7 @@ void aggregate_result_functor::operator()<aggregation::COLLECT_SET>(aggregation
   auto const nans_equal =
     dynamic_cast<cudf::detail::collect_set_aggregation const&>(agg)._nans_equal;
   cache.add_result(
-    col_idx,
+    values,
     agg,
     lists::detail::drop_list_duplicates(
       lists_column_view(collect_result->view()), nulls_equal, nans_equal, stream, mr));
@@ -443,10 +443,10 @@ void aggregate_result_functor::operator()<aggregation::COLLECT_SET>(aggregation
 template <>
 void aggregate_result_functor::operator()<aggregation::MERGE_LISTS>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_merge_lists(
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
@@ -479,7 +479,7 @@ void aggregate_result_functor::operator()<aggregation::MERGE_LISTS>(aggregation
 template <>
 void aggregate_result_functor::operator()<aggregation::MERGE_SETS>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
   auto const merged_result   = detail::group_merge_lists(get_grouped_values(),
                                                        helper.group_offsets(stream),
@@ -487,7 +487,7 @@ void aggregate_result_functor::operator()<aggregation::MERGE_SETS>(aggregation c
                                                        stream,
                                                        rmm::mr::get_current_device_resource());
   auto const& merge_sets_agg = dynamic_cast<cudf::detail::merge_sets_aggregation const&>(agg);
-  cache.add_result(col_idx,
+  cache.add_result(values,
                    agg,
                    lists::detail::drop_list_duplicates(lists_column_view(merged_result->view()),
                                                        merge_sets_agg._nulls_equal,
@@ -516,10 +516,10 @@ void aggregate_result_functor::operator()<aggregation::MERGE_SETS>(aggregation c
 template <>
 void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::group_merge_m2(
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
@@ -538,13 +538,12 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
   // sum and count. std depends on mean and count
   cudf::detail::result_cache cache(requests.size());
 
-  for (size_t i = 0; i < requests.size(); i++) {
+  for (auto const& request : requests) {
     auto store_functor =
-      detail::aggregate_result_functor(i, requests[i].values, helper(), cache, stream, mr);
-    for (size_t j = 0; j < requests[i].aggregations.size(); j++) {
+      detail::aggregate_result_functor(request.values, helper(), cache, stream, mr);
+    for (auto const& agg : request.aggregations) {
       // TODO (dm): single pass compute all supported reductions
-      cudf::detail::aggregation_dispatcher(
-        requests[i].aggregations[j]->kind, store_functor, *requests[i].aggregations[j]);
+      cudf::detail::aggregation_dispatcher(agg->kind, store_functor, *agg);
     }
   }
 
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
index afb92f8e141..cbe5f08639a 100644
--- a/cpp/src/groupby/sort/functors.hpp
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -36,13 +36,12 @@ namespace detail {
  * of these values.
  */
 struct store_result_functor {
-  store_result_functor(size_type col_idx,
-                       column_view const& values,
+  store_result_functor(column_view const& values,
                        sort::sort_groupby_helper& helper,
                        cudf::detail::result_cache& cache,
                        rmm::cuda_stream_view stream,
                        rmm::mr::device_memory_resource* mr)
-    : col_idx(col_idx), helper(helper), cache(cache), values(values), stream(stream), mr(mr)
+    : helper(helper), cache(cache), values(values), stream(stream), mr(mr)
   {
   }
 
@@ -80,7 +79,6 @@ struct store_result_functor {
   };
 
  protected:
-  size_type col_idx;                  ///< Index of column in requests being operated on
   sort::sort_groupby_helper& helper;  ///< Sort helper
   cudf::detail::result_cache& cache;  ///< cache of results to store into
   column_view const& values;          ///< Column of values to group and aggregate
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
index c43df77bb5e..3e260dee8c4 100644
--- a/cpp/src/groupby/sort/scan.cpp
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -66,10 +66,10 @@ struct scan_result_functor final : store_result_functor {
 template <>
 void scan_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::sum_scan(
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
@@ -78,10 +78,10 @@ void scan_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 template <>
 void scan_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::min_scan(
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
@@ -90,10 +90,10 @@ void scan_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
 template <>
 void scan_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::max_scan(
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
@@ -102,15 +102,15 @@ void scan_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
 template <>
 void scan_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
 
-  cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(stream), stream, mr));
+  cache.add_result(values, agg, detail::count_scan(helper.group_labels(stream), stream, mr));
 }
 
 template <>
 void scan_result_functor::operator()<aggregation::RANK>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
   CUDF_EXPECTS(helper.is_presorted(),
                "Rank aggregate in groupby scan requires the keys to be presorted");
   auto const order_by = get_grouped_values();
@@ -122,7 +122,7 @@ void scan_result_functor::operator()<aggregation::RANK>(aggregation const& agg)
                "Unsupported nested columns in grouped rank scan.");
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::rank_scan(
       order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr));
@@ -131,7 +131,7 @@ void scan_result_functor::operator()<aggregation::RANK>(aggregation const& agg)
 template <>
 void scan_result_functor::operator()<aggregation::DENSE_RANK>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) return;
+  if (cache.has_result(values, agg)) return;
   CUDF_EXPECTS(helper.is_presorted(),
                "Dense rank aggregate in groupby scan requires the keys to be presorted");
   auto const order_by = get_grouped_values();
@@ -143,7 +143,7 @@ void scan_result_functor::operator()<aggregation::DENSE_RANK>(aggregation const&
                "Unsupported nested columns in grouped dense_rank scan.");
 
   cache.add_result(
-    col_idx,
+    values,
     agg,
     detail::dense_rank_scan(
       order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr));
@@ -161,10 +161,9 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
   // sum and count. std depends on mean and count
   cudf::detail::result_cache cache(requests.size());
 
-  for (size_t i = 0; i < requests.size(); i++) {
-    auto store_functor =
-      detail::scan_result_functor(i, requests[i].values, helper(), cache, stream, mr);
-    for (auto const& aggregation : requests[i].aggregations) {
+  for (auto const& request : requests) {
+    auto store_functor = detail::scan_result_functor(request.values, helper(), cache, stream, mr);
+    for (auto const& aggregation : request.aggregations) {
       // TODO (dm): single pass compute all supported reductions
       cudf::detail::aggregation_dispatcher(aggregation->kind, store_functor, *aggregation);
     }

From d9a8bd77261d6bc57f82249bf318729ce85506af Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 8 Sep 2021 12:42:24 +0530
Subject: [PATCH 15/79] Update cpp/include/cudf/column/column_view.hpp

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/column/column_view.hpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 43386e926d2..03e3c201a4b 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -635,18 +635,13 @@ mutable_column_view bit_cast(mutable_column_view const& input, data_type type);
 
 namespace detail {
 /**
- * @brief Computes a hash value on the specified column view based on the shallow state of the
- * column view.
+ * @brief Computes a hash value from the shallow state of the specified column
  *
- * Only the shallow states (i.e pointers instead of data pointed by the pointer) of the column view
- * are used in the hash computation. The hash value is computed  recursively on the children of the
- * column view.
- * The states used for the hash computation are: type, size, data pointer, null_mask pointer,
- * offset, and the hash value of the children. Note that `null_count` is not used.
+ * Two `column_view`s, `c1` and `c2`, that view the exact same physical column will produce equal `shallow_hash()` values, i.e., `is_shallow_equal(c0, c1)` implies `shallow_hash(c0) == shallow_hash(c1)`. 
  *
- * Note: This hash function may result in different hash for a copy of the same column with exactly
- * same contents. It is guarenteed to give same hash value for same column_view only, even if the
- * underlying data changes.
+ * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e., it is independent of the number of elements in the column. 
+ *
+ * This function does _not_ inspect the elements of `input` nor access any device memory or launch any kernels. 
  *
  * @param input The `column_view` to compute hash
  * @return The hash value

From d96f870309d4c1cb0e7b93f4b32cfdfff543313c Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 8 Sep 2021 10:23:53 -0700
Subject: [PATCH 16/79] added definition of correlation() in cython

---
 Untitled.ipynb                            | 33 +++++++++++++++++++++++
 python/cudf/cudf/_lib/aggregation.pyx     | 15 +++++++++++
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  4 +++
 3 files changed, 52 insertions(+)
 create mode 100644 Untitled.ipynb

diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 00000000000..e38548d42a9
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06d9628c-d48e-40cb-a90b-ab83ce92af3b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 097018fe3c0..1ee329a545e 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -321,6 +321,13 @@ cdef class Aggregation:
             ))
         return agg
 
+    @classmethod
+    def correlation(cls):
+        cdef Aggregation agg = cls()
+        agg.c_obj = move(
+            libcudf_aggregation.make_correlation_aggregation[aggregation]())
+        return agg
+
 cdef class RollingAggregation:
     """A Cython wrapper for rolling window aggregations.
 
@@ -674,6 +681,14 @@ cdef class GroupbyAggregation:
         )
         return agg
 
+    @classmethod
+    def correlation(cls):
+        cdef GroupbyAggregation agg = cls()
+        agg.c_obj = move(
+            libcudf_aggregation.
+            make_correlation_aggregation[groupby_aggregation]())
+        return agg
+
 cdef class GroupbyScanAggregation:
     """A Cython wrapper for groupby scan aggregations.
 
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 13bfa49057c..db4c5f023a6 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -38,6 +38,8 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
+            CORRELATION 'cudf::aggregation::CORRELATION'
+
         Kind kind
 
     cdef cppclass rolling_aggregation:
@@ -106,3 +108,5 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
+
+    cdef unique_ptr[T] make_correlation_aggregation[T]() except +

From 7e7f250d5f85b06a81a26c536f24d9e69c5b8831 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 13 Sep 2021 14:47:24 +0530
Subject: [PATCH 17/79] ignore data, nullmask, offset if parent size is empty

---
 cpp/include/cudf/column/column_view.hpp       |  12 +-
 cpp/src/column/column_view.cpp                |  39 ++++--
 cpp/tests/column/column_view_shallow_test.cpp | 126 +++++++++++++-----
 3 files changed, 123 insertions(+), 54 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 03e3c201a4b..b7cf833d063 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -637,15 +637,19 @@ namespace detail {
 /**
  * @brief Computes a hash value from the shallow state of the specified column
  *
- * Two `column_view`s, `c1` and `c2`, that view the exact same physical column will produce equal `shallow_hash()` values, i.e., `is_shallow_equal(c0, c1)` implies `shallow_hash(c0) == shallow_hash(c1)`. 
+ * Two `column_view`s, `c1` and `c2`, that view the exact same physical column will produce equal
+ * `shallow_hash()` values, i.e., `is_shallow_equal(c0, c1)` implies `shallow_hash(c0) ==
+ * shallow_hash(c1)`.
  *
- * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e., it is independent of the number of elements in the column. 
+ * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e.,
+ * it is independent of the number of elements in the column.
  *
- * This function does _not_ inspect the elements of `input` nor access any device memory or launch any kernels. 
+ * This function does _not_ inspect the elements of `input` nor access any device memory or launch
+ * any kernels.
  *
  * @param input The `column_view` to compute hash
  * @return The hash value
  */
-size_t shallow_hash(column_view const& input);
+std::size_t shallow_hash(column_view const& input);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index d1202108ae5..c0c1f9d4acd 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -80,21 +80,32 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
 
 // simple prime number multiplication algorithm.
 // Adapted from http://myeyesareblind.com/2017/02/06/Combine-hash-values/#apachecommons
-constexpr void combine_hash(size_t& h1, size_t h2) { h1 = h1 * 127 + h2; }
+constexpr void combine_hash(std::size_t& h1, std::size_t h2) { h1 = h1 * 127 + h2; }
+
+struct shallow_hash_impl {
+  std::size_t operator()(column_view const& input, bool is_parent_empty = false)
+  {
+    std::size_t hash = 0;
+    combine_hash(hash, std::hash<data_type>{}(input.type()));
+    combine_hash(hash, std::hash<size_type>{}(input.size()));
+    if (not(input.is_empty() or is_parent_empty)) {
+      combine_hash(hash, std::hash<void const*>{}(input.head()));
+      combine_hash(hash, std::hash<void const*>{}(input.null_mask()));
+      combine_hash(hash, std::hash<size_type>{}(input.offset()));
+    }
+    hash = std::accumulate(
+      input.child_begin(),
+      input.child_end(),
+      hash,
+      [&input, is_parent_empty](std::size_t hash, auto const& child) {
+        combine_hash(hash, shallow_hash_impl{}(child, input.is_empty() or is_parent_empty));
+        return hash;
+      });
+    return hash;
+  }
+};
 
-size_t shallow_hash(column_view const& input)
-{
-  size_t hash = 0;
-  combine_hash(hash, std::hash<data_type>{}(input.type()));
-  combine_hash(hash, std::hash<size_type>{}(input.size()));
-  combine_hash(hash, std::hash<void const*>{}(input.head()));
-  combine_hash(hash, std::hash<void const*>{}(input.null_mask()));
-  combine_hash(hash, std::hash<size_type>{}(input.offset()));
-  std::for_each(input.child_begin(), input.child_end(), [&hash](auto const& child) {
-    combine_hash(hash, shallow_hash(child));
-  });
-  return hash;
-}
+std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl{}(input); }
 }  // namespace detail
 
 // Immutable view constructor
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index b0f6eeac450..2d881a3c872 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -30,32 +30,48 @@
 #include <memory>
 #include <type_traits>
 
-template <typename T>
+// fixed_width, dict, string, list, struct
+template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
+std::unique_ptr<cudf::column> example_column()
+{
+  auto begin = thrust::make_counting_iterator(1);
+  auto end   = thrust::make_counting_iterator(16);
+  return cudf::test::fixed_width_column_wrapper<T>(begin, end).release();
+}
+
+template <typename T, std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
+std::unique_ptr<cudf::column> example_column()
+{
+  return cudf::test::dictionary_column_wrapper<std::string>(
+           {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""}, {1, 1, 1, 1, 1, 1, 1, 1, 0})
+    .release();
+}
+
+template <typename T,
+          std::enable_if_t<std::is_same_v<T, std::string> or
+                           std::is_same_v<T, cudf::string_view>>* = nullptr>
+std::unique_ptr<cudf::column> example_column()
+
+{
+  return cudf::test::strings_column_wrapper(
+           {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""})
+    .release();
+}
+
+template <typename T, std::enable_if_t<std::is_same_v<T, cudf::list_view>>* = nullptr>
+std::unique_ptr<cudf::column> example_column()
+{
+  return cudf::test::lists_column_wrapper<int>({{1, 2, 3}, {4, 5}, {}, {6, 7, 8}}).release();
+}
+
+template <typename T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>* = nullptr>
 std::unique_ptr<cudf::column> example_column()
 {
-  // fixed_width, dict, string, list, struct
-  if constexpr (cudf::is_fixed_width<T>()) {
-    auto begin = thrust::make_counting_iterator(1);
-    auto end   = thrust::make_counting_iterator(16);
-    return cudf::test::fixed_width_column_wrapper<T>(begin, end).release();
-  } else if constexpr (cudf::is_dictionary<T>()) {
-    return cudf::test::dictionary_column_wrapper<std::string>(
-             {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""}, {1, 1, 1, 1, 1, 1, 1, 1, 0})
-      .release();
-  } else if constexpr (std::is_same_v<T, std::string> or std::is_same_v<T, cudf::string_view>) {
-    return cudf::test::strings_column_wrapper(
-             {"fff", "aaa", "ddd", "bbb", "ccc", "ccc", "ccc", "", ""})
-      .release();
-  } else if constexpr (std::is_same_v<T, cudf::list_view>) {
-    return cudf::test::lists_column_wrapper<int>({{1, 2, 3}, {4, 5}, {}, {6, 7, 8}}).release();
-  } else if constexpr (std::is_same_v<T, cudf::struct_view>) {
-    auto begin    = thrust::make_counting_iterator(1);
-    auto end      = thrust::make_counting_iterator(16);
-    auto member_0 = cudf::test::fixed_width_column_wrapper<int32_t>(begin, end);
-    auto member_1 = cudf::test::fixed_width_column_wrapper<int32_t>(begin + 10, end + 10);
-    return cudf::test::structs_column_wrapper({member_0, member_1}).release();
-  }
-  return {};
+  auto begin    = thrust::make_counting_iterator(1);
+  auto end      = thrust::make_counting_iterator(16);
+  auto member_0 = cudf::test::fixed_width_column_wrapper<int32_t>(begin, end);
+  auto member_1 = cudf::test::fixed_width_column_wrapper<int32_t>(begin + 10, end + 10);
+  return cudf::test::structs_column_wrapper({member_0, member_1}).release();
 }
 
 template <typename T>
@@ -68,13 +84,15 @@ TYPED_TEST_CASE(ColumnViewShallowTests, AllTypes);
 // Test for fixed_width, dict, string, list, struct
 // column_view, column_view = same hash.
 // column_view, make a copy = same hash.
+// new column_view from colmn = same hash
 // column_view, copy column = diff hash
+// column_view, diff column = diff hash.
+//
 // column_view old, update data + new column_view     = same hash.
 // column_view old, add null_mask + new column_view   = diff hash.
 // column_view old, update nulls + new column_view    = same hash.
 // column_view old, set_null_count + new column_view  = same hash.
 //
-// column_view, diff column     = diff hash.
 // column_view, sliced[0, size) = same hash (for split too)
 // column_view, sliced[n:)      = diff hash (for split too)
 // column_view, bit_cast        = diff hash
@@ -85,7 +103,7 @@ TYPED_TEST_CASE(ColumnViewShallowTests, AllTypes);
 // update the children column data  = same hash
 // update the children column_views = diff hash
 
-TYPED_TEST(ColumnViewShallowTests, shallow_hash)
+TYPED_TEST(ColumnViewShallowTests, shallow_hash_basic)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
@@ -99,17 +117,32 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
     auto col_view_copy = col_view;
     EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_copy));
   }
+
+  // new column_view from column = same hash
+  {
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+  }
+
   // copy column = diff hash
   {
     auto col_new       = std::make_unique<cudf::column>(*col);
     auto col_view_copy = col_new->view();
     EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_copy));
   }
-  // new column_view from column = same hash
+
+  // column_view, diff column = diff hash.
   {
-    auto col_view_new = cudf::column_view{*col};
-    EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new));
+    auto col_diff      = example_column<TypeParam>();
+    auto col_view_diff = cudf::column_view{*col_diff};
+    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_diff));
   }
+}
+TYPED_TEST(ColumnViewShallowTests, shallow_hash_update_data)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
   // update data + new column_view = same hash.
   {
     // update data by modifying some bits: fixed_width, string, dict, list, struct
@@ -151,14 +184,14 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
     auto col_view_new2 = cudf::column_view{*col};
     EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_view_new2));
   }
+}
 
-  // column_view, diff column = diff hash.
-  {
-    auto col_diff      = example_column<TypeParam>();
-    auto col_view_diff = cudf::column_view{*col_diff};
-    EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_diff));
-  }
-  // column_view, sliced[0, size]  = same hash (for split too)
+TYPED_TEST(ColumnViewShallowTests, shallow_hash_slice)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
+  // column_view, sliced[0, size)  = same hash (for split too)
   {
     auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
     EXPECT_EQ(shallow_hash(col_view), shallow_hash(col_sliced[0]));
@@ -174,6 +207,20 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
     EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[0]));
     EXPECT_NE(shallow_hash(col_view), shallow_hash(col_split[1]));
   }
+  // column_view, col copy sliced[0, 0)  = same hash (empty column)
+  {
+    auto col_new        = std::make_unique<cudf::column>(*col);
+    auto col_new_view   = col_new->view();
+    auto col_sliced     = cudf::slice(col_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
+    auto col_new_sliced = cudf::slice(col_new_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
+
+    EXPECT_EQ(shallow_hash(col_sliced[0]), shallow_hash(col_sliced[1]));
+    EXPECT_EQ(shallow_hash(col_sliced[1]), shallow_hash(col_sliced[2]));
+    EXPECT_EQ(shallow_hash(col_sliced[0]), shallow_hash(col_new_sliced[0]));
+    EXPECT_EQ(shallow_hash(col_sliced[1]), shallow_hash(col_new_sliced[1]));
+    EXPECT_EQ(shallow_hash(col_sliced[2]), shallow_hash(col_new_sliced[2]));
+  }
+
   // column_view, bit_cast         = diff hash
   {
     if constexpr (std::is_integral_v<TypeParam> and not std::is_same_v<TypeParam, bool>) {
@@ -185,6 +232,13 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
       EXPECT_NE(shallow_hash(col_view), shallow_hash(col_bitcast));
     }
   }
+}
+
+TYPED_TEST(ColumnViewShallowTests, shallow_hash_mutable)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
   // mutable_column_view, column_view = same hash
   {
     auto col_mutable = cudf::mutable_column_view{*col};

From 00051540682e8340ad4b04e59c7c56bdf43bbcbb Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 14 Sep 2021 02:25:04 +0530
Subject: [PATCH 18/79] is_shallow_equal ignore children states for empty
 column. (not children type)

---
 cpp/include/cudf/column/column_view.hpp       |   2 +-
 cpp/src/column/column_view.cpp                |  22 ++++
 cpp/tests/column/column_view_shallow_test.cpp | 100 ++++++++++++------
 3 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index b490eae44d3..25d13d04207 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -667,6 +667,6 @@ std::size_t shallow_hash(column_view const& input);
  * @param rhs The right `column_view` to compare
  * @return true if the shallow states of the two column views are equal
  */
-bool shallow_equal(column_view const& lhs, column_view const& rhs);
+bool is_shallow_equal(column_view const& lhs, column_view const& rhs);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index c0c1f9d4acd..4447273d7ff 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -106,6 +106,28 @@ struct shallow_hash_impl {
 };
 
 std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl{}(input); }
+
+struct shallow_equal_impl {
+  bool operator()(column_view const& lhs, column_view const& rhs, bool is_parent_empty = false)
+  {
+    bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty;
+    return (lhs.type() == rhs.type()) and
+           (is_empty or
+            ((lhs.size() == rhs.size()) and (lhs.head() == rhs.head()) and
+             (lhs.null_mask() == rhs.null_mask()) and (lhs.offset() == rhs.offset()))) and
+           std::equal(lhs.child_begin(),
+                      lhs.child_end(),
+                      rhs.child_begin(),
+                      rhs.child_end(),
+                      [is_empty](auto const& lhs_child, auto const& rhs_child) {
+                        return shallow_equal_impl{}(lhs_child, rhs_child, is_empty);
+                      });
+  }
+};
+bool is_shallow_equal(column_view const& lhs, column_view const& rhs)
+{
+  return shallow_equal_impl{}(lhs, rhs);
+}
 }  // namespace detail
 
 // Immutable view constructor
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index 58fa28397a8..6858911c54f 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -272,31 +272,46 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash_mutable)
   }
 }
 
-TYPED_TEST(ColumnViewShallowTests, shallow_equal)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_basic)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
   auto col_view = cudf::column_view{*col};
   // same = same hash
   {
-    EXPECT_TRUE(shallow_equal(col_view, col_view));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view));
   }
   // copy column_view = same hash
   {
     auto col_view_copy = col_view;
-    EXPECT_TRUE(shallow_equal(col_view, col_view_copy));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_copy));
   }
+
+  // new column_view from column = same hash
+  {
+    auto col_view_new = cudf::column_view{*col};
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
+  }
+
   // copy column = diff hash
   {
     auto col_new       = std::make_unique<cudf::column>(*col);
     auto col_view_copy = col_new->view();
-    EXPECT_FALSE(shallow_equal(col_view, col_view_copy));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_view_copy));
   }
-  // new column_view from column = same hash
+
+  // column_view, diff column = diff hash.
   {
-    auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+    auto col_diff      = example_column<TypeParam>();
+    auto col_view_diff = cudf::column_view{*col_diff};
+    EXPECT_FALSE(is_shallow_equal(col_view, col_view_diff));
   }
+}
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_update_data)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
   // update data + new column_view = same hash.
   {
     // update data by modifying some bits: fixed_width, string, dict, list, struct
@@ -310,57 +325,71 @@ TYPED_TEST(ColumnViewShallowTests, shallow_equal)
       cudf::set_null_mask(data, 2, 64, true);
     }
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
   }
   // add null_mask + new column_view = diff hash.
   {
     col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_FALSE(shallow_equal(col_view, col_view_new));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_view_new));
     col_view_new.null_count();
-    EXPECT_FALSE(shallow_equal(col_view, col_view_new));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_view_new));
     auto col_view_new2 = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view_new, col_view_new2));
+    EXPECT_TRUE(is_shallow_equal(col_view_new, col_view_new2));
   }
   col_view = cudf::column_view{*col};  // updating after adding null_mask
   // update nulls + new column_view = same hash.
   {
     cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false);
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
   }
   // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT)
   {
     col->set_null_count(cudf::UNKNOWN_NULL_COUNT);
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
     col->set_null_count(col->size());
     auto col_view_new2 = cudf::column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_view, col_view_new2));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new2));
   }
+}
 
-  // column_view, diff column = diff hash.
-  {
-    auto col_diff      = example_column<TypeParam>();
-    auto col_view_diff = cudf::column_view{*col_diff};
-    EXPECT_FALSE(shallow_equal(col_view, col_view_diff));
-  }
-  // column_view, sliced[0, size]  = same hash (for split too)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_slice)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
+  // column_view, sliced[0, size)  = same hash (for split too)
   {
     auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
-    EXPECT_TRUE(shallow_equal(col_view, col_sliced[0]));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_sliced[0]));
     auto col_split = cudf::split(col_view, {0});
-    EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
-    EXPECT_TRUE(shallow_equal(col_view, col_split[1]));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_split[0]));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_split[1]));
   }
   // column_view, sliced[n:]       = diff hash (for split too)
   {
     auto col_sliced = cudf::slice(col_view, {1, col_view.size()});
-    EXPECT_FALSE(shallow_equal(col_view, col_sliced[0]));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_sliced[0]));
     auto col_split = cudf::split(col_view, {1});
-    EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
-    EXPECT_FALSE(shallow_equal(col_view, col_split[1]));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_split[0]));
+    EXPECT_FALSE(is_shallow_equal(col_view, col_split[1]));
   }
+  // column_view, col copy sliced[0, 0)  = same hash (empty column)
+  {
+    auto col_new        = std::make_unique<cudf::column>(*col);
+    auto col_new_view   = col_new->view();
+    auto col_sliced     = cudf::slice(col_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
+    auto col_new_sliced = cudf::slice(col_new_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
+
+    EXPECT_TRUE(is_shallow_equal(col_sliced[0], col_sliced[1]));
+    EXPECT_TRUE(is_shallow_equal(col_sliced[1], col_sliced[2]));
+    EXPECT_TRUE(is_shallow_equal(col_sliced[0], col_new_sliced[0]));
+    EXPECT_TRUE(is_shallow_equal(col_sliced[1], col_new_sliced[1]));
+    EXPECT_TRUE(is_shallow_equal(col_sliced[2], col_new_sliced[2]));
+  }
+
   // column_view, bit_cast         = diff hash
   {
     if constexpr (std::is_integral_v<TypeParam> and not std::is_same_v<TypeParam, bool>) {
@@ -369,13 +398,20 @@ TYPED_TEST(ColumnViewShallowTests, shallow_equal)
                                          std::make_signed_t<TypeParam>>;
       auto new_type    = cudf::data_type(cudf::type_to_id<newType>());
       auto col_bitcast = cudf::bit_cast(col_view, new_type);
-      EXPECT_FALSE(shallow_equal(col_view, col_bitcast));
+      EXPECT_FALSE(is_shallow_equal(col_view, col_bitcast));
     }
   }
+}
+
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_mutable)
+{
+  using namespace cudf::detail;
+  auto col      = example_column<TypeParam>();
+  auto col_view = cudf::column_view{*col};
   // mutable_column_view, column_view = same hash
   {
     auto col_mutable = cudf::mutable_column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_mutable, col_view));
+    EXPECT_TRUE(is_shallow_equal(col_mutable, col_view));
   }
   // mutable_column_view, modified mutable_column_view = same hash
   // update the children column data = same hash
@@ -390,9 +426,9 @@ TYPED_TEST(ColumnViewShallowTests, shallow_equal)
       auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
       cudf::set_null_mask(data, 1, 32, false);
     }
-    EXPECT_TRUE(shallow_equal(col_view, col_mutable));
+    EXPECT_TRUE(is_shallow_equal(col_view, col_mutable));
     auto col_mutable_new = cudf::mutable_column_view{*col};
-    EXPECT_TRUE(shallow_equal(col_mutable, col_mutable_new));
+    EXPECT_TRUE(is_shallow_equal(col_mutable, col_mutable_new));
   }
   // update the children column_views = diff hash
   {
@@ -400,7 +436,7 @@ TYPED_TEST(ColumnViewShallowTests, shallow_equal)
       col->child(0).set_null_mask(
         cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL));
       auto col_child_updated = cudf::mutable_column_view{*col};
-      EXPECT_FALSE(shallow_equal(col_view, col_child_updated));
+      EXPECT_FALSE(is_shallow_equal(col_view, col_child_updated));
     }
   }
 }

From 82b5a26b6b6ca537c0eee35168721a3cd2747464 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 14 Sep 2021 14:46:30 -0700
Subject: [PATCH 19/79] set STRUCT_AGGS to CORRELATION

---
 python/cudf/cudf/_lib/groupby.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 153b116cd33..a6ea631de82 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -54,7 +54,7 @@ _CATEGORICAL_AGGS = {"COUNT", "SIZE", "NUNIQUE", "UNIQUE"}
 _STRING_AGGS = {"COUNT", "SIZE", "MAX", "MIN", "NUNIQUE", "NTH", "COLLECT",
                 "UNIQUE"}
 _LIST_AGGS = {"COLLECT"}
-_STRUCT_AGGS = set()
+_STRUCT_AGGS = {'CORRELATION'}
 _INTERVAL_AGGS = set()
 _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
                  "NTH", "COLLECT"}

From e692053241c68893d20f97e6abaca4639747dfb2 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 15 Sep 2021 04:40:38 +0530
Subject: [PATCH 20/79] for empty column, ignore child pointers in shallow_hash

---
 cpp/src/column/column_view.cpp | 54 ++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 4447273d7ff..b0363000213 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -80,28 +80,44 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
 
 // simple prime number multiplication algorithm.
 // Adapted from http://myeyesareblind.com/2017/02/06/Combine-hash-values/#apachecommons
-constexpr void combine_hash(std::size_t& h1, std::size_t h2) { h1 = h1 * 127 + h2; }
+constexpr std::size_t combine_hash(std::size_t h1, std::size_t h2) { return h1 * 127 + h2; }
+// 32/64-bit boost hash_combine https://stackoverflow.com/a/4948967/1550940
+constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
+{
+  constexpr std::size_t const magic = sizeof(std::size_t) == 8 ? 0x9e3779b97f4a7c15 : 0x9e3779b9;
+  lhs ^= rhs + magic + (lhs << 6) + (lhs >> 2);
+  return lhs;
+}
+
+// Struct to use custom combine hash and fold expression
+struct HashValue {
+  std::size_t hash;
+  HashValue(std::size_t h) : hash{h} {}
+  HashValue operator^(HashValue const& other) const
+  {
+    return HashValue{combine_hash(hash, other.hash)};
+  }
+};
+
+template <typename... Ts>
+constexpr auto hash(Ts&&... ts)
+{
+  return (... ^ HashValue(std::hash<Ts>{}(ts))).hash;
+}
 
 struct shallow_hash_impl {
-  std::size_t operator()(column_view const& input, bool is_parent_empty = false)
+  std::size_t operator()(column_view const& c, bool is_parent_empty = false)
   {
-    std::size_t hash = 0;
-    combine_hash(hash, std::hash<data_type>{}(input.type()));
-    combine_hash(hash, std::hash<size_type>{}(input.size()));
-    if (not(input.is_empty() or is_parent_empty)) {
-      combine_hash(hash, std::hash<void const*>{}(input.head()));
-      combine_hash(hash, std::hash<void const*>{}(input.null_mask()));
-      combine_hash(hash, std::hash<size_type>{}(input.offset()));
-    }
-    hash = std::accumulate(
-      input.child_begin(),
-      input.child_end(),
-      hash,
-      [&input, is_parent_empty](std::size_t hash, auto const& child) {
-        combine_hash(hash, shallow_hash_impl{}(child, input.is_empty() or is_parent_empty));
-        return hash;
-      });
-    return hash;
+    std::size_t const init = (c.is_empty() or is_parent_empty)
+                               ? hash(c.type(), c.size())
+                               : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
+    return std::accumulate(c.child_begin(),
+                           c.child_end(),
+                           init,
+                           [&c, is_parent_empty](std::size_t hash, auto const& child) {
+                             return combine_hash(
+                               hash, shallow_hash_impl{}(child, c.is_empty() or is_parent_empty));
+                           });
   }
 };
 

From 44372bcb35d27e4fff3c1fef58e8d2b4fed10feb Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Wed, 15 Sep 2021 04:43:11 +0530
Subject: [PATCH 21/79] rename is_shallow_equal to is_shallow_equivalent

---
 cpp/include/cudf/column/column_view.hpp       |  4 +-
 cpp/src/column/column_view.cpp                |  2 +-
 cpp/tests/column/column_view_shallow_test.cpp | 64 +++++++++----------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 25d13d04207..546f91a30a3 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -638,7 +638,7 @@ namespace detail {
  * @brief Computes a hash value from the shallow state of the specified column
  *
  * Two `column_view`s, `c1` and `c2`, that view the exact same physical column will produce equal
- * `shallow_hash()` values, i.e., `is_shallow_equal(c0, c1)` implies `shallow_hash(c0) ==
+ * `shallow_hash()` values, i.e., `is_shallow_equivalent(c0, c1)` implies `shallow_hash(c0) ==
  * shallow_hash(c1)`.
  *
  * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e.,
@@ -667,6 +667,6 @@ std::size_t shallow_hash(column_view const& input);
  * @param rhs The right `column_view` to compare
  * @return true if the shallow states of the two column views are equal
  */
-bool is_shallow_equal(column_view const& lhs, column_view const& rhs);
+bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs);
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index b0363000213..2464a9eeee6 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -140,7 +140,7 @@ struct shallow_equal_impl {
                       });
   }
 };
-bool is_shallow_equal(column_view const& lhs, column_view const& rhs)
+bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs)
 {
   return shallow_equal_impl{}(lhs, rhs);
 }
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index 6858911c54f..f76f682bb2f 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -272,42 +272,42 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash_mutable)
   }
 }
 
-TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_basic)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_basic)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
   auto col_view = cudf::column_view{*col};
   // same = same hash
   {
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view));
   }
   // copy column_view = same hash
   {
     auto col_view_copy = col_view;
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_copy));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_copy));
   }
 
   // new column_view from column = same hash
   {
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new));
   }
 
   // copy column = diff hash
   {
     auto col_new       = std::make_unique<cudf::column>(*col);
     auto col_view_copy = col_new->view();
-    EXPECT_FALSE(is_shallow_equal(col_view, col_view_copy));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_copy));
   }
 
   // column_view, diff column = diff hash.
   {
     auto col_diff      = example_column<TypeParam>();
     auto col_view_diff = cudf::column_view{*col_diff};
-    EXPECT_FALSE(is_shallow_equal(col_view, col_view_diff));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_diff));
   }
 }
-TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_update_data)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_update_data)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
@@ -325,37 +325,37 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_update_data)
       cudf::set_null_mask(data, 2, 64, true);
     }
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new));
   }
   // add null_mask + new column_view = diff hash.
   {
     col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_FALSE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new));
     col_view_new.null_count();
-    EXPECT_FALSE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new));
     auto col_view_new2 = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view_new, col_view_new2));
+    EXPECT_TRUE(is_shallow_equivalent(col_view_new, col_view_new2));
   }
   col_view = cudf::column_view{*col};  // updating after adding null_mask
   // update nulls + new column_view = same hash.
   {
     cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false);
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new));
   }
   // set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT)
   {
     col->set_null_count(cudf::UNKNOWN_NULL_COUNT);
     auto col_view_new = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new));
     col->set_null_count(col->size());
     auto col_view_new2 = cudf::column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_view, col_view_new2));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_view_new2));
   }
 }
 
-TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_slice)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_slice)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
@@ -363,18 +363,18 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_slice)
   // column_view, sliced[0, size)  = same hash (for split too)
   {
     auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
-    EXPECT_TRUE(is_shallow_equal(col_view, col_sliced[0]));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_sliced[0]));
     auto col_split = cudf::split(col_view, {0});
-    EXPECT_FALSE(is_shallow_equal(col_view, col_split[0]));
-    EXPECT_TRUE(is_shallow_equal(col_view, col_split[1]));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[0]));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_split[1]));
   }
   // column_view, sliced[n:]       = diff hash (for split too)
   {
     auto col_sliced = cudf::slice(col_view, {1, col_view.size()});
-    EXPECT_FALSE(is_shallow_equal(col_view, col_sliced[0]));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_sliced[0]));
     auto col_split = cudf::split(col_view, {1});
-    EXPECT_FALSE(is_shallow_equal(col_view, col_split[0]));
-    EXPECT_FALSE(is_shallow_equal(col_view, col_split[1]));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[0]));
+    EXPECT_FALSE(is_shallow_equivalent(col_view, col_split[1]));
   }
   // column_view, col copy sliced[0, 0)  = same hash (empty column)
   {
@@ -383,11 +383,11 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_slice)
     auto col_sliced     = cudf::slice(col_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
     auto col_new_sliced = cudf::slice(col_new_view, {0, 0, 1, 1, col_view.size(), col_view.size()});
 
-    EXPECT_TRUE(is_shallow_equal(col_sliced[0], col_sliced[1]));
-    EXPECT_TRUE(is_shallow_equal(col_sliced[1], col_sliced[2]));
-    EXPECT_TRUE(is_shallow_equal(col_sliced[0], col_new_sliced[0]));
-    EXPECT_TRUE(is_shallow_equal(col_sliced[1], col_new_sliced[1]));
-    EXPECT_TRUE(is_shallow_equal(col_sliced[2], col_new_sliced[2]));
+    EXPECT_TRUE(is_shallow_equivalent(col_sliced[0], col_sliced[1]));
+    EXPECT_TRUE(is_shallow_equivalent(col_sliced[1], col_sliced[2]));
+    EXPECT_TRUE(is_shallow_equivalent(col_sliced[0], col_new_sliced[0]));
+    EXPECT_TRUE(is_shallow_equivalent(col_sliced[1], col_new_sliced[1]));
+    EXPECT_TRUE(is_shallow_equivalent(col_sliced[2], col_new_sliced[2]));
   }
 
   // column_view, bit_cast         = diff hash
@@ -398,12 +398,12 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_slice)
                                          std::make_signed_t<TypeParam>>;
       auto new_type    = cudf::data_type(cudf::type_to_id<newType>());
       auto col_bitcast = cudf::bit_cast(col_view, new_type);
-      EXPECT_FALSE(is_shallow_equal(col_view, col_bitcast));
+      EXPECT_FALSE(is_shallow_equivalent(col_view, col_bitcast));
     }
   }
 }
 
-TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_mutable)
+TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_mutable)
 {
   using namespace cudf::detail;
   auto col      = example_column<TypeParam>();
@@ -411,7 +411,7 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_mutable)
   // mutable_column_view, column_view = same hash
   {
     auto col_mutable = cudf::mutable_column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_mutable, col_view));
+    EXPECT_TRUE(is_shallow_equivalent(col_mutable, col_view));
   }
   // mutable_column_view, modified mutable_column_view = same hash
   // update the children column data = same hash
@@ -426,9 +426,9 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_mutable)
       auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
       cudf::set_null_mask(data, 1, 32, false);
     }
-    EXPECT_TRUE(is_shallow_equal(col_view, col_mutable));
+    EXPECT_TRUE(is_shallow_equivalent(col_view, col_mutable));
     auto col_mutable_new = cudf::mutable_column_view{*col};
-    EXPECT_TRUE(is_shallow_equal(col_mutable, col_mutable_new));
+    EXPECT_TRUE(is_shallow_equivalent(col_mutable, col_mutable_new));
   }
   // update the children column_views = diff hash
   {
@@ -436,7 +436,7 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equal_mutable)
       col->child(0).set_null_mask(
         cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL));
       auto col_child_updated = cudf::mutable_column_view{*col};
-      EXPECT_FALSE(is_shallow_equal(col_view, col_child_updated));
+      EXPECT_FALSE(is_shallow_equivalent(col_view, col_child_updated));
     }
   }
 }

From 3aab04fd20a78ba543265e572d4c9507ee27b59e Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 15 Sep 2021 16:08:52 -0700
Subject: [PATCH 22/79] added ctypedef correlation_type. need to add tests

---
 python/cudf/cudf/_lib/aggregation.pyx     | 10 +++++++---
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  8 +++++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 3557b505e81..0c594d302f8 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -57,7 +57,7 @@ class AggregationKind(Enum):
     UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET
     PTX = libcudf_aggregation.aggregation.Kind.PTX
     CUDA = libcudf_aggregation.aggregation.Kind.CUDA
-
+    CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION
 
 cdef class Aggregation:
     """A Cython wrapper for aggregations.
@@ -325,7 +325,9 @@ cdef class Aggregation:
     def correlation(cls):
         cdef Aggregation agg = cls()
         agg.c_obj = move(
-            libcudf_aggregation.make_correlation_aggregation[aggregation]())
+            libcudf_aggregation.make_correlation_aggregation[aggregation](
+                libcudf_aggregation.correlation_type.PEARSON
+            ))
         return agg
 
 cdef class RollingAggregation:
@@ -704,7 +706,9 @@ cdef class GroupbyAggregation:
         cdef GroupbyAggregation agg = cls()
         agg.c_obj = move(
             libcudf_aggregation.
-            make_correlation_aggregation[groupby_aggregation]())
+            make_correlation_aggregation[groupby_aggregation](
+                libcudf_aggregation.correlation_type.PEARSON
+            ))
         return agg
 
 cdef class GroupbyScanAggregation:
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index db4c5f023a6..04deeb877d1 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -55,6 +55,11 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'
 
+    ctypedef enum correlation_type:
+        PEARSON 'cudf::correlation_type::PEARSON'
+        KENDALL 'cudf::correlation_type::KENDALL'
+        SPEARMAN 'cudf::correlation_type::SPEARMAN'
+
     cdef unique_ptr[T] make_sum_aggregation[T]() except +
 
     cdef unique_ptr[T] make_product_aggregation[T]() except +
@@ -109,4 +114,5 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         string user_defined_aggregator,
         data_type output_type) except +
 
-    cdef unique_ptr[T] make_correlation_aggregation[T]() except +
+    cdef unique_ptr[T] make_correlation_aggregation[T](
+        correlation_type type) except +

From ecc3a7d436c5b054674d3f81054db61a5c0fbbbf Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 16 Sep 2021 18:04:41 +0530
Subject: [PATCH 23/79] use hash_combine for shallow hash

---
 cpp/include/cudf/types.hpp     | 21 ++++++++++++++++++---
 cpp/src/column/column_view.cpp | 18 ++++++------------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index b417148b1a4..2afc220162c 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -27,8 +27,8 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <iterator>
 #include <functional>
+#include <iterator>
 
 /**
  * @file
@@ -328,6 +328,21 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh
  */
 std::size_t size_of(data_type t);
 
+/**
+ * @brief Combines two hashed values into a single hashed value.
+ *
+ * Adapted from boost hash_combine, modified for 32/64-bit
+ * https://stackoverflow.com/a/4948967/1550940
+ * @param lhs The first hashed value
+ * @param rhs The second hashed value
+ * @return Combined hash value
+ */
+constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
+{
+  constexpr std::size_t const magic = sizeof(std::size_t) == 8 ? 0x9e3779b97f4a7c15 : 0x9e3779b9;
+  lhs ^= rhs + magic + (lhs << 6) + (lhs >> 2);
+  return lhs;
+}
 /**
  *  @brief Identifies the hash function to be used
  */
@@ -353,8 +368,8 @@ template <>
 struct hash<cudf::data_type> {
   std::size_t operator()(cudf::data_type const& type) const noexcept
   {
-    return std::hash<int32_t>{}(static_cast<int32_t>(type.id())) * 127 +
-           std::hash<int32_t>{}(type.scale());
+    return cudf::hash_combine(std::hash<int32_t>{}(static_cast<int32_t>(type.id())),
+                              std::hash<int32_t>{}(type.scale()));
   }
 };
 }  // namespace std
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 2464a9eeee6..67aad27f951 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -78,24 +78,18 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
            : cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end);
 }
 
+// Alternative fast hash functions
 // simple prime number multiplication algorithm.
 // Adapted from http://myeyesareblind.com/2017/02/06/Combine-hash-values/#apachecommons
-constexpr std::size_t combine_hash(std::size_t h1, std::size_t h2) { return h1 * 127 + h2; }
-// 32/64-bit boost hash_combine https://stackoverflow.com/a/4948967/1550940
-constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
-{
-  constexpr std::size_t const magic = sizeof(std::size_t) == 8 ? 0x9e3779b97f4a7c15 : 0x9e3779b9;
-  lhs ^= rhs + magic + (lhs << 6) + (lhs >> 2);
-  return lhs;
-}
+// constexpr std::size_t combine_hash(std::size_t h1, std::size_t h2) { return h1 * 127 + h2; }
 
-// Struct to use custom combine hash and fold expression
+// Struct to use custom hash combine and fold expression
 struct HashValue {
   std::size_t hash;
-  HashValue(std::size_t h) : hash{h} {}
+  explicit HashValue(std::size_t h) : hash{h} {}
   HashValue operator^(HashValue const& other) const
   {
-    return HashValue{combine_hash(hash, other.hash)};
+    return HashValue{hash_combine(hash, other.hash)};
   }
 };
 
@@ -115,7 +109,7 @@ struct shallow_hash_impl {
                            c.child_end(),
                            init,
                            [&c, is_parent_empty](std::size_t hash, auto const& child) {
-                             return combine_hash(
+                             return hash_combine(
                                hash, shallow_hash_impl{}(child, c.is_empty() or is_parent_empty));
                            });
   }

From d2cd4681975f5d87b67ba8325c2af038d0b27a40 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 17 Sep 2021 04:08:02 +0530
Subject: [PATCH 24/79] Apply suggestions from code review (jake)

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/column/column_view.hpp | 27 +++++++++++++------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 546f91a30a3..c6ba9fed1cc 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -637,9 +637,7 @@ namespace detail {
 /**
  * @brief Computes a hash value from the shallow state of the specified column
  *
- * Two `column_view`s, `c1` and `c2`, that view the exact same physical column will produce equal
- * `shallow_hash()` values, i.e., `is_shallow_equivalent(c0, c1)` implies `shallow_hash(c0) ==
- * shallow_hash(c1)`.
+ * For any two columns, if `is_shallow_equivalent(c0,c1)` then `shallow_hash(c0) == shallow_hash(c1)`. 
  *
  * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e.,
  * it is independent of the number of elements in the column.
@@ -648,24 +646,27 @@ namespace detail {
  * any kernels.
  *
  * @param input The `column_view` to compute hash
- * @return The hash value
+ * @return The hash value derived from the shallow state of `input`. 
  */
 std::size_t shallow_hash(column_view const& input);
 
 /**
- * @brief Equality operator for column views based on the shallow state of the column view.
+ * @brief Uses only shallow state to determine if two `column_view`s view equivalent columns 
  *
- * Only shallow states used for the hash computation are: type, size, data pointer, null_mask
- * pointer, offset and the column_view of the children recursively. Note that `null_count` is not
- * used.
- *
- * Note: This equality function will consider a column not equal to a copy of the same column with
- * exactly same contents. It is guarenteed to return true for same column_view only, even if the
- * underlying data changes.
+ *  Two columns are equivalent if for any operation `F` then:
+ *   ```
+ *    is_shallow_equivalent(c0, c1) ==> is_shallow_equivalent(F(c0),F(c1))
+ *   ```
+ * For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact same physical column. In other words, two physically independent columns may have exactly equivalent elements but their shallow state would not be equivalent. 
+ * 
+ * The complexity of this function is `O( min(count_descendants(lhs), count_descendants(rhs)) )`, i.e., it is independent of the number of elements in either column.
  *
+ * This function does _not_ inspect the elements of `lhs` or `rhs` nor access any device memory nor launch
+ * any kernels.
+ * 
  * @param lhs The left `column_view` to compare
  * @param rhs The right `column_view` to compare
- * @return true if the shallow states of the two column views are equal
+ * @return If `lhs` and `rhs` have equivalent shallow state
  */
 bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs);
 }  // namespace detail

From fa40847cd36c79fdb1fc4799ebd300e78b520438 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Fri, 17 Sep 2021 10:40:42 +0530
Subject: [PATCH 25/79] address review comments

---
 cpp/include/cudf/column/column_view.hpp | 22 +++++----
 cpp/include/cudf/types.hpp              |  7 +--
 cpp/src/column/column_view.cpp          | 66 ++++++++++++-------------
 3 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index c6ba9fed1cc..f4ca4404430 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -637,7 +637,8 @@ namespace detail {
 /**
  * @brief Computes a hash value from the shallow state of the specified column
  *
- * For any two columns, if `is_shallow_equivalent(c0,c1)` then `shallow_hash(c0) == shallow_hash(c1)`. 
+ * For any two columns, if `is_shallow_equivalent(c0,c1)` then `shallow_hash(c0) ==
+ * shallow_hash(c1)`.
  *
  * The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e.,
  * it is independent of the number of elements in the column.
@@ -646,24 +647,27 @@ namespace detail {
  * any kernels.
  *
  * @param input The `column_view` to compute hash
- * @return The hash value derived from the shallow state of `input`. 
+ * @return The hash value derived from the shallow state of `input`.
  */
 std::size_t shallow_hash(column_view const& input);
 
 /**
- * @brief Uses only shallow state to determine if two `column_view`s view equivalent columns 
+ * @brief Uses only shallow state to determine if two `column_view`s view equivalent columns
  *
  *  Two columns are equivalent if for any operation `F` then:
  *   ```
  *    is_shallow_equivalent(c0, c1) ==> is_shallow_equivalent(F(c0),F(c1))
  *   ```
- * For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact same physical column. In other words, two physically independent columns may have exactly equivalent elements but their shallow state would not be equivalent. 
- * 
- * The complexity of this function is `O( min(count_descendants(lhs), count_descendants(rhs)) )`, i.e., it is independent of the number of elements in either column.
+ * For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact
+ * same physical column. In other words, two physically independent columns may have exactly
+ * equivalent elements but their shallow state would not be equivalent.
+ *
+ * The complexity of this function is `O( min(count_descendants(lhs), count_descendants(rhs)) )`,
+ * i.e., it is independent of the number of elements in either column.
+ *
+ * This function does _not_ inspect the elements of `lhs` or `rhs` nor access any device memory nor
+ * launch any kernels.
  *
- * This function does _not_ inspect the elements of `lhs` or `rhs` nor access any device memory nor launch
- * any kernels.
- * 
  * @param lhs The left `column_view` to compare
  * @param rhs The right `column_view` to compare
  * @return If `lhs` and `rhs` have equivalent shallow state
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 2afc220162c..6926683b401 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -331,16 +331,17 @@ std::size_t size_of(data_type t);
 /**
  * @brief Combines two hashed values into a single hashed value.
  *
- * Adapted from boost hash_combine, modified for 32/64-bit
+ * Adapted from Boost hash_combine function, modified for 64-bit
+ * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
  * https://stackoverflow.com/a/4948967/1550940
+ *
  * @param lhs The first hashed value
  * @param rhs The second hashed value
  * @return Combined hash value
  */
 constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
 {
-  constexpr std::size_t const magic = sizeof(std::size_t) == 8 ? 0x9e3779b97f4a7c15 : 0x9e3779b9;
-  lhs ^= rhs + magic + (lhs << 6) + (lhs >> 2);
+  lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2);
   return lhs;
 }
 /**
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 67aad27f951..525da2afe73 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -23,6 +23,7 @@
 #include <thrust/iterator/transform_iterator.h>
 
 #include <algorithm>
+#include <cstddef>
 #include <exception>
 #include <numeric>
 #include <vector>
@@ -99,44 +100,41 @@ constexpr auto hash(Ts&&... ts)
   return (... ^ HashValue(std::hash<Ts>{}(ts))).hash;
 }
 
-struct shallow_hash_impl {
-  std::size_t operator()(column_view const& c, bool is_parent_empty = false)
-  {
-    std::size_t const init = (c.is_empty() or is_parent_empty)
-                               ? hash(c.type(), c.size())
-                               : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
-    return std::accumulate(c.child_begin(),
-                           c.child_end(),
-                           init,
-                           [&c, is_parent_empty](std::size_t hash, auto const& child) {
-                             return hash_combine(
-                               hash, shallow_hash_impl{}(child, c.is_empty() or is_parent_empty));
-                           });
-  }
-};
+std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false)
+{
+  std::size_t const init = (c.is_empty() or is_parent_empty)
+                             ? hash(c.type(), c.size())
+                             : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
+  return std::accumulate(c.child_begin(),
+                         c.child_end(),
+                         init,
+                         [&c, is_parent_empty](std::size_t hash, auto const& child) {
+                           return hash_combine(
+                             hash, shallow_hash_impl(child, c.is_empty() or is_parent_empty));
+                         });
+}
 
-std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl{}(input); }
+std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl(input); }
 
-struct shallow_equal_impl {
-  bool operator()(column_view const& lhs, column_view const& rhs, bool is_parent_empty = false)
-  {
-    bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty;
-    return (lhs.type() == rhs.type()) and
-           (is_empty or
-            ((lhs.size() == rhs.size()) and (lhs.head() == rhs.head()) and
-             (lhs.null_mask() == rhs.null_mask()) and (lhs.offset() == rhs.offset()))) and
-           std::equal(lhs.child_begin(),
-                      lhs.child_end(),
-                      rhs.child_begin(),
-                      rhs.child_end(),
-                      [is_empty](auto const& lhs_child, auto const& rhs_child) {
-                        return shallow_equal_impl{}(lhs_child, rhs_child, is_empty);
-                      });
-  }
-};
+bool shallow_equal_impl(column_view const& lhs,
+                        column_view const& rhs,
+                        bool is_parent_empty = false)
+{
+  bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty;
+  return (lhs.type() == rhs.type()) and
+         (is_empty or ((lhs.size() == rhs.size()) and (lhs.head() == rhs.head()) and
+                       (lhs.null_mask() == rhs.null_mask()) and (lhs.offset() == rhs.offset()))) and
+         std::equal(lhs.child_begin(),
+                    lhs.child_end(),
+                    rhs.child_begin(),
+                    rhs.child_end(),
+                    [is_empty](auto const& lhs_child, auto const& rhs_child) {
+                      return shallow_equal_impl(lhs_child, rhs_child, is_empty);
+                    });
+}
 bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs)
 {
-  return shallow_equal_impl{}(lhs, rhs);
+  return shallow_equal_impl(lhs, rhs);
 }
 }  // namespace detail
 

From 6ac572522d48545851f2869ee3ec7cd5094a5226 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Fri, 17 Sep 2021 10:58:01 +0530
Subject: [PATCH 26/79] update after PR #9185 updates

---
 cpp/include/cudf/detail/aggregation/result_cache.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index a15e15d7d01..6ada327d107 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -18,6 +18,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/types.hpp>
 
 #include <unordered_map>
 
@@ -27,14 +28,14 @@ struct pair_column_aggregation_equal_to {
   bool operator()(std::pair<column_view, aggregation const&> const& lhs,
                   std::pair<column_view, aggregation const&> const& rhs) const
   {
-    return is_shallow_equal(lhs.first, rhs.first) and lhs.second.is_equal(rhs.second);
+    return is_shallow_equivalent(lhs.first, rhs.first) and lhs.second.is_equal(rhs.second);
   }
 };
 
 struct pair_column_aggregation_hash {
   size_t operator()(std::pair<column_view, aggregation const&> const& key) const noexcept
   {
-    return shallow_hash(key.first) * 127 + key.second.do_hash();
+    return hash_combine(shallow_hash(key.first), key.second.do_hash());
   }
 };
 

From e36b834dd93c3184a7174c40b30020767d9bfa2f Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 13:50:59 +0530
Subject: [PATCH 27/79] add boost license for hash_combine, move to diff header

---
 cpp/include/cudf/detail/hashing.hpp           | 35 +++++++++++++++++++
 .../cudf/detail/utilities/hash_functions.cuh  | 12 +++++++
 cpp/include/cudf/types.hpp                    | 29 ---------------
 cpp/src/column/column_view.cpp                |  6 +---
 4 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 83d6be14709..22acf15fbf8 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -19,6 +19,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <functional>
+
 namespace cudf {
 namespace detail {
 
@@ -53,5 +55,38 @@ std::unique_ptr<column> serial_murmur_hash3_32(
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/* Copyright 2005-2014 Daniel James.
+ *
+ * Use, modification and distribution is subject to the Boost Software
+ * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt)
+ */
+/**
+ * @brief Combines two hashed values into a single hashed value.
+ *
+ * Adapted from Boost hash_combine function, modified for 64-bit
+ * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
+ *
+ * @param lhs The first hashed value
+ * @param rhs The second hashed value
+ * @return Combined hash value
+ */
+constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
+{
+  lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2);
+  return lhs;
+}
 }  // namespace detail
 }  // namespace cudf
+
+// specialization of std::hash for cudf::data_type
+namespace std {
+template <>
+struct hash<cudf::data_type> {
+  std::size_t operator()(cudf::data_type const& type) const noexcept
+  {
+    return cudf::detail::hash_combine(std::hash<int32_t>{}(static_cast<int32_t>(type.id())),
+                                      std::hash<int32_t>{}(type.scale()));
+  }
+};
+}  // namespace std
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 6eab13ae9af..65deadd6cd0 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -395,6 +395,12 @@ struct MurmurHash3_32 {
     return h;
   }
 
+  /* Copyright 2005-2014 Daniel James.
+   *
+   * Use, modification and distribution is subject to the Boost Software
+   * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+   * http://www.boost.org/LICENSE_1_0.txt)
+   */
   /**
    * @brief  Combines two hash values into a new single hash value. Called
    * repeatedly to create a hash value from several variables.
@@ -795,6 +801,12 @@ struct IdentityHash {
   IdentityHash()    = default;
   constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}
 
+  /* Copyright 2005-2014 Daniel James.
+   *
+   * Use, modification and distribution is subject to the Boost Software
+   * License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+   * http://www.boost.org/LICENSE_1_0.txt)
+   */
   /**
    * @brief  Combines two hash values into a new single hash value. Called
    * repeatedly to create a hash value from several variables.
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 6926683b401..e1037efb5c8 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -27,7 +27,6 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <functional>
 #include <iterator>
 
 /**
@@ -328,22 +327,6 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh
  */
 std::size_t size_of(data_type t);
 
-/**
- * @brief Combines two hashed values into a single hashed value.
- *
- * Adapted from Boost hash_combine function, modified for 64-bit
- * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
- * https://stackoverflow.com/a/4948967/1550940
- *
- * @param lhs The first hashed value
- * @param rhs The second hashed value
- * @return Combined hash value
- */
-constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
-{
-  lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2);
-  return lhs;
-}
 /**
  *  @brief Identifies the hash function to be used
  */
@@ -362,15 +345,3 @@ static constexpr uint32_t DEFAULT_HASH_SEED = 0;
 
 /** @} */
 }  // namespace cudf
-
-// specialization of std::hash for cudf::data_type
-namespace std {
-template <>
-struct hash<cudf::data_type> {
-  std::size_t operator()(cudf::data_type const& type) const noexcept
-  {
-    return cudf::hash_combine(std::hash<int32_t>{}(static_cast<int32_t>(type.id())),
-                              std::hash<int32_t>{}(type.scale()));
-  }
-};
-}  // namespace std
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 525da2afe73..d214c507ca5 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/hashing.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -79,11 +80,6 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
            : cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end);
 }
 
-// Alternative fast hash functions
-// simple prime number multiplication algorithm.
-// Adapted from http://myeyesareblind.com/2017/02/06/Combine-hash-values/#apachecommons
-// constexpr std::size_t combine_hash(std::size_t h1, std::size_t h2) { return h1 * 127 + h2; }
-
 // Struct to use custom hash combine and fold expression
 struct HashValue {
   std::size_t hash;

From 1fbe3fc688073062292c4825d1cd9ac116dad181 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 13:53:21 +0530
Subject: [PATCH 28/79] Apply suggestions from code review (jake)

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/column/column_view.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index f4ca4404430..cd490c3c832 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -656,7 +656,7 @@ std::size_t shallow_hash(column_view const& input);
  *
  *  Two columns are equivalent if for any operation `F` then:
  *   ```
- *    is_shallow_equivalent(c0, c1) ==> is_shallow_equivalent(F(c0),F(c1))
+ *    is_shallow_equivalent(c0, c1) ==> The results of F(c0) and F(c1) are equivalent
  *   ```
  * For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact
  * same physical column. In other words, two physically independent columns may have exactly

From fc3cc6b538b60b7549de6ad08cceaa293098778b Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 13:59:26 +0530
Subject: [PATCH 29/79] include cleanup

---
 cpp/include/cudf/detail/hashing.hpp | 1 +
 cpp/src/column/column_view.cpp      | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 22acf15fbf8..bd5c8a42a51 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -19,6 +19,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cstddef>
 #include <functional>
 
 namespace cudf {
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index d214c507ca5..d8132b4f545 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -24,7 +24,6 @@
 #include <thrust/iterator/transform_iterator.h>
 
 #include <algorithm>
-#include <cstddef>
 #include <exception>
 #include <numeric>
 #include <vector>

From f7b6bb637d9d3f4bd5c400610af40a06322cb511 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 14:06:20 +0530
Subject: [PATCH 30/79] add missing include due to reorg

---
 cpp/include/cudf/detail/aggregation/result_cache.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index 6ada327d107..170960ba56d 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -18,6 +18,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/hashing.hpp>
 #include <cudf/types.hpp>
 
 #include <unordered_map>

From 7db9870d82392c88faa0a1942558bec46fbdfa19 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 15:04:25 +0530
Subject: [PATCH 31/79] update groupby corr to use hashed result cache

---
 cpp/src/groupby/sort/aggregate.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 3ce54c7996a..a54ef6e0a98 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -532,9 +532,9 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
 template <>
 void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation const& agg)
 {
-  if (cache.has_result(col_idx, agg)) { return; }
+  if (cache.has_result(values, agg)) { return; }
 
-  cache.add_result(col_idx,
+  cache.add_result(values,
                    agg,
                    detail::group_corr(get_grouped_values(),
                                       helper.group_offsets(stream),

From 5bb1dc460d621db73cc6121b294029108c001840 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 23:50:48 +0530
Subject: [PATCH 32/79] Revert "set STRUCT_AGGS to CORRELATION"

This reverts commit 82b5a26b6b6ca537c0eee35168721a3cd2747464.
---
 python/cudf/cudf/_lib/groupby.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 3d9c39ae2fc..19ef6555a6e 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -54,7 +54,7 @@ _CATEGORICAL_AGGS = {"COUNT", "SIZE", "NUNIQUE", "UNIQUE"}
 _STRING_AGGS = {"COUNT", "SIZE", "MAX", "MIN", "NUNIQUE", "NTH", "COLLECT",
                 "UNIQUE"}
 _LIST_AGGS = {"COLLECT"}
-_STRUCT_AGGS = {'CORRELATION'}
+_STRUCT_AGGS = set()
 _INTERVAL_AGGS = set()
 _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
                  "NTH", "COLLECT"}

From fb98fd551023bb314dbf1149433e3b5c5dfbad83 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 23:50:59 +0530
Subject: [PATCH 33/79] Revert "added ctypedef correlation_type. need to add
 tests"

This reverts commit 3aab04fd20a78ba543265e572d4c9507ee27b59e.
---
 python/cudf/cudf/_lib/aggregation.pyx     | 10 +++-------
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  8 +-------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 0c594d302f8..3557b505e81 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -57,7 +57,7 @@ class AggregationKind(Enum):
     UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET
     PTX = libcudf_aggregation.aggregation.Kind.PTX
     CUDA = libcudf_aggregation.aggregation.Kind.CUDA
-    CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION
+
 
 cdef class Aggregation:
     """A Cython wrapper for aggregations.
@@ -325,9 +325,7 @@ cdef class Aggregation:
     def correlation(cls):
         cdef Aggregation agg = cls()
         agg.c_obj = move(
-            libcudf_aggregation.make_correlation_aggregation[aggregation](
-                libcudf_aggregation.correlation_type.PEARSON
-            ))
+            libcudf_aggregation.make_correlation_aggregation[aggregation]())
         return agg
 
 cdef class RollingAggregation:
@@ -706,9 +704,7 @@ cdef class GroupbyAggregation:
         cdef GroupbyAggregation agg = cls()
         agg.c_obj = move(
             libcudf_aggregation.
-            make_correlation_aggregation[groupby_aggregation](
-                libcudf_aggregation.correlation_type.PEARSON
-            ))
+            make_correlation_aggregation[groupby_aggregation]())
         return agg
 
 cdef class GroupbyScanAggregation:
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 04deeb877d1..db4c5f023a6 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -55,11 +55,6 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'
 
-    ctypedef enum correlation_type:
-        PEARSON 'cudf::correlation_type::PEARSON'
-        KENDALL 'cudf::correlation_type::KENDALL'
-        SPEARMAN 'cudf::correlation_type::SPEARMAN'
-
     cdef unique_ptr[T] make_sum_aggregation[T]() except +
 
     cdef unique_ptr[T] make_product_aggregation[T]() except +
@@ -114,5 +109,4 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         string user_defined_aggregator,
         data_type output_type) except +
 
-    cdef unique_ptr[T] make_correlation_aggregation[T](
-        correlation_type type) except +
+    cdef unique_ptr[T] make_correlation_aggregation[T]() except +

From 324c37dcfc0f2b3279ba075727fc8563af05205f Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 18 Sep 2021 23:57:08 +0530
Subject: [PATCH 34/79] Revert "added definition of correlation() in cython"

This reverts commit d96f870309d4c1cb0e7b93f4b32cfdfff543313c.
---
 Untitled.ipynb                            | 33 -----------------------
 python/cudf/cudf/_lib/aggregation.pyx     | 15 -----------
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  4 ---
 3 files changed, 52 deletions(-)
 delete mode 100644 Untitled.ipynb

diff --git a/Untitled.ipynb b/Untitled.ipynb
deleted file mode 100644
index e38548d42a9..00000000000
--- a/Untitled.ipynb
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "06d9628c-d48e-40cb-a90b-ab83ce92af3b",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 3557b505e81..4f703724cef 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -321,13 +321,6 @@ cdef class Aggregation:
             ))
         return agg
 
-    @classmethod
-    def correlation(cls):
-        cdef Aggregation agg = cls()
-        agg.c_obj = move(
-            libcudf_aggregation.make_correlation_aggregation[aggregation]())
-        return agg
-
 cdef class RollingAggregation:
     """A Cython wrapper for rolling window aggregations.
 
@@ -699,14 +692,6 @@ cdef class GroupbyAggregation:
         )
         return agg
 
-    @classmethod
-    def correlation(cls):
-        cdef GroupbyAggregation agg = cls()
-        agg.c_obj = move(
-            libcudf_aggregation.
-            make_correlation_aggregation[groupby_aggregation]())
-        return agg
-
 cdef class GroupbyScanAggregation:
     """A Cython wrapper for groupby scan aggregations.
 
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index db4c5f023a6..13bfa49057c 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -38,8 +38,6 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
-            CORRELATION 'cudf::aggregation::CORRELATION'
-
         Kind kind
 
     cdef cppclass rolling_aggregation:
@@ -108,5 +106,3 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
-
-    cdef unique_ptr[T] make_correlation_aggregation[T]() except +

From 9f19ddfb468fd345df0aef560dd19bf195884a27 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Mon, 20 Sep 2021 10:09:24 +0530
Subject: [PATCH 35/79] Apply suggestions from code review (jake)

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/src/aggregation/result_cache.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/aggregation/result_cache.cpp b/cpp/src/aggregation/result_cache.cpp
index 04750f7fa98..2b6359f20e8 100644
--- a/cpp/src/aggregation/result_cache.cpp
+++ b/cpp/src/aggregation/result_cache.cpp
@@ -38,18 +38,18 @@ void result_cache::add_result(column_view const& input,
 
 column_view result_cache::get_result(column_view const& input, aggregation const& agg) const
 {
-  CUDF_EXPECTS(has_result(input, agg), "Result does not exist in cache");
 
   auto result_it = _cache.find({input, agg});
+  CUDF_EXPECTS(result_it != _cache.end(), "Result does not exist in cache");
   return result_it->second.second->view();
 }
 
 std::unique_ptr<column> result_cache::release_result(column_view const& input,
                                                      aggregation const& agg)
 {
-  CUDF_EXPECTS(has_result(input, agg), "Result does not exist in cache");
 
-  auto result_it = _cache.extract({input, agg});
+  auto node = _cache.extract({input, agg});
+  CUDF_EXPECTS(not node.empty(), "Result does not exist in cache");
   return std::move(result_it.mapped().second);
 }
 

From ab955bb2ffb99e7223f787f2091385d45a4068ec Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 20 Sep 2021 11:45:07 +0530
Subject: [PATCH 36/79] enable result caching of child columns in correlation

---
 cpp/src/groupby/sort/aggregate.cpp        | 29 ++++++-
 cpp/src/groupby/sort/group_corr.cu        | 92 ++++-------------------
 cpp/src/groupby/sort/group_reductions.hpp | 14 +++-
 3 files changed, 56 insertions(+), 79 deletions(-)

diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index a54ef6e0a98..c7954db5d75 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -533,13 +533,40 @@ template <>
 void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation const& agg)
 {
   if (cache.has_result(values, agg)) { return; }
+  CUDF_EXPECTS(values.type().id() == type_id::STRUCT,
+               "Input to `group_corr` must be a structs column.");
+  CUDF_EXPECTS(values.num_children() == 2,
+               "Input to `group_corr` must be a structs column having 2 children columns.");
+  CUDF_EXPECTS(values.nullable() == false,
+               "Input to `group_corr` must be a non-nullable structs column.");
+
+  auto const& corr_agg = dynamic_cast<cudf::detail::correlation_aggregation const&>(agg);
+  CUDF_EXPECTS(corr_agg._type == correlation_type::PEARSON,
+               "Only Pearson correlation is supported.");
+
+  auto std_agg = make_std_aggregation();
+  cudf::detail::aggregation_dispatcher(
+    std_agg->kind, aggregate_result_functor(values.child(0), helper, cache, stream, mr), *std_agg);
+  cudf::detail::aggregation_dispatcher(
+    std_agg->kind, aggregate_result_functor(values.child(1), helper, cache, stream, mr), *std_agg);
+
+  auto const stddev0 = cache.get_result(values.child(0), *std_agg);
+  auto const stddev1 = cache.get_result(values.child(1), *std_agg);
+  auto mean_agg      = make_mean_aggregation();
+  auto const mean0   = cache.get_result(values.child(0), *mean_agg);
+  auto const mean1   = cache.get_result(values.child(1), *mean_agg);
 
   cache.add_result(values,
                    agg,
-                   detail::group_corr(get_grouped_values(),
+                   detail::group_corr(get_grouped_values().child(0),
+                                      get_grouped_values().child(1),
                                       helper.group_offsets(stream),
                                       helper.group_labels(stream),
                                       helper.num_groups(stream),
+                                      mean0,
+                                      mean1,
+                                      stddev0,
+                                      stddev1,
                                       stream,
                                       mr));
 };
diff --git a/cpp/src/groupby/sort/group_corr.cu b/cpp/src/groupby/sort/group_corr.cu
index 35f29a1bb59..5c47676d4f9 100644
--- a/cpp/src/groupby/sort/group_corr.cu
+++ b/cpp/src/groupby/sort/group_corr.cu
@@ -104,101 +104,42 @@ struct corr_transform {  // : thrust::unary_function<size_type, ResultType>
     return (x - xmean) * (y - ymean) / (group_size - ddof) / xstddev / ystddev;
   }
 };
-
-/*
-sum((x-xu)*(y-yu))
-transform_output_iterator /N-1, stdx, stdy  how do you know the indices? we can not.
-So,
-(x-xu)*(y-yu))/N-1/stdx/stdy as single iterator., then reduce_by_key.
-very similar to var_transform in group_std.
-*/
-
-std::tuple<std::unique_ptr<column>, std::unique_ptr<column>> group_mean_stddev(
-  column_view const& values_0,
-  cudf::device_span<size_type const> group_offsets,
-  cudf::device_span<size_type const> group_labels,
-  size_type num_groups,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  auto sum1   = detail::group_sum(values_0, num_groups, group_labels, stream, mr);
-  auto count1 = values_0.nullable()
-                  ? detail::group_count_valid(values_0, group_labels, num_groups, stream, mr)
-                  : detail::group_count_all(group_offsets, num_groups, stream, mr);
-  auto mean1 =
-    cudf::detail::binary_operation(*sum1,
-                                   *count1,
-                                   binary_operator::DIV,
-                                   cudf::detail::target_type(values_0.type(), aggregation::MEAN),
-                                   stream,
-                                   mr);
-
-  auto var1    = detail::group_var(values_0,
-                                *mean1,
-                                *count1,
-                                group_labels,
-                                1,  // default var_agg._ddof,
-                                stream,
-                                mr);
-  auto stddev1 = cudf::detail::unary_operation(*var1, unary_operator::SQRT, stream, mr);
-  return std::make_tuple(std::move(mean1), std::move(stddev1));
-}
-
 }  // namespace
 
 // TODO Eventually this function should accept values_0, values_1, not a struct.
-std::unique_ptr<column> group_corr(column_view const& values,
+std::unique_ptr<column> group_corr(column_view const& values_0,
+                                   column_view const& values_1,
                                    cudf::device_span<size_type const> group_offsets,
                                    cudf::device_span<size_type const> group_labels,
                                    size_type num_groups,
+                                   column_view const& mean_0,
+                                   column_view const& mean_1,
+                                   column_view const& stddev_0,
+                                   column_view const& stddev_1,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
-  CUDF_EXPECTS(values.type().id() == type_id::STRUCT,
-               "Input to `group_corr` must be a structs column.");
-  CUDF_EXPECTS(values.num_children() == 2,
-               "Input to `group_corr` must be a structs column having 2 children columns.");
-  CUDF_EXPECTS(values.nullable() == false,
-               "Input to `group_corr` must be a non-nullable structs column.");
-  std::cout << "size=" << values.size() << std::endl;
-  std::cout << "num_children=" << values.num_children() << std::endl;
-
   using result_type = id_to_type<type_id::FLOAT64>;
   static_assert(
     std::is_same_v<cudf::detail::target_type_t<result_type, aggregation::Kind::CORRELATION>,
                    result_type>);
 
   // check if each child type can be converted to float64.
-  bool const is_convertible =
-    std::all_of(values.child_begin(), values.child_end(), [](auto const& c) {
-      return type_dispatcher(c.type(), is_double_convertible_impl{});
-    });
+  bool const is_convertible = type_dispatcher(values_0.type(), is_double_convertible_impl{}) or
+                              type_dispatcher(values_1.type(), is_double_convertible_impl{});
+
   CUDF_EXPECTS(is_convertible,
-               "Input to `group_corr` must be a structs column having all children columns of type "
-               "convertible to float64.");
+               "Input to `group_corr` must be columns of type convertible to float64.");
 
-  // TODO calculate SUM
   // TODO calculate COUNT_VALID  (need to do for 2 seperately. for MEAN, and
   // bitmask_and->COUNT_VALID for CORR.)
-  // TODO calculate MEAN
-  // TODO calculate VARIANCE
-  // TODO calculate STDDEV
   // TODO calculate CORR. (requires MEAN1, MEAN2, COUNT_VALID_ANDed, STDDEV1, STDDEV2)
   // TODO shuffle.
 
-  auto const& values_0 = values.child(0);
-  auto const& values_1 = values.child(1);
-  // TODO fix caching of child sum, count_valid, mean, variance, stddev. [unsupported due to
-  // result_cache design]
-  auto [mean0, stddev0] =
-    group_mean_stddev(values_0, group_offsets, group_labels, num_groups, stream, mr);
-  auto [mean1, stddev1] =
-    group_mean_stddev(values_1, group_offsets, group_labels, num_groups, stream, mr);
-
-  auto mean0_ptr   = mean0->mutable_view().begin<result_type>();
-  auto mean1_ptr   = mean1->mutable_view().begin<result_type>();
-  auto stddev0_ptr = stddev0->mutable_view().begin<result_type>();
-  auto stddev1_ptr = stddev1->mutable_view().begin<result_type>();
+  auto mean0_ptr   = mean_0.begin<result_type>();
+  auto mean1_ptr   = mean_1.begin<result_type>();
+  auto stddev0_ptr = stddev_0.begin<result_type>();
+  auto stddev1_ptr = stddev_1.begin<result_type>();
 
   // TODO replace with ANDed bitmask. (values, stddev)
   auto count1 = values_0.nullable()
@@ -217,9 +158,8 @@ std::unique_ptr<column> group_corr(column_view const& values,
                                                 group_labels.begin()};
 
   // result
-  auto const any_nulls = std::any_of(
-    values.child_begin(), values.child_end(), [](auto const& c) { return c.has_nulls(); });
-  auto mask_type = any_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED;
+  auto const any_nulls = values_0.has_nulls() or values_1.has_nulls();
+  auto mask_type       = any_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED;
 
   auto result =
     make_numeric_column(data_type(type_to_id<result_type>()), num_groups, mask_type, stream, mr);
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 6bb87d7ea6a..5bd658d8f76 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -445,17 +445,27 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
  * @brief Internal API to find correlation of child columns of a non-nullable struct column.
  * TODO fill documentation.
  *
- * @param values Grouped values (tuples of values `(valid_count, mean, M2)`) to merge.
+ * @param values_0 The first grouped values column to correlate
+ * @param values_1 The second grouped values column to correlate
  * @param group_offsets Offsets of groups' starting points within @p values.
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups.
+ * @param mean_0 The mean of the first grouped values column
+ * @param mean_1 The mean of the second grouped values column
+ * @param stddev_0 The standard deviation of the first grouped values column
+ * @param stddev_1 The standard deviation of the second grouped values column
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> group_corr(column_view const& values,
+std::unique_ptr<column> group_corr(column_view const& values_0,
+                                   column_view const& values_1,
                                    cudf::device_span<size_type const> group_offsets,
                                    cudf::device_span<size_type const> group_labels,
                                    size_type num_groups,
+                                   column_view const& mean_0,
+                                   column_view const& mean_1,
+                                   column_view const& stddev_0,
+                                   column_view const& stddev_1,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr);
 /** @endinternal

From 98bbc94fd4e5f572412475f7f487a7a4f42caac1 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 20 Sep 2021 13:33:46 +0530
Subject: [PATCH 37/79] fix duplicate {col, agg} request extract

---
 cpp/src/aggregation/result_cache.cpp |  4 +---
 cpp/src/groupby/common/utils.hpp     | 20 ++++++++++++++++++--
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/cpp/src/aggregation/result_cache.cpp b/cpp/src/aggregation/result_cache.cpp
index 2b6359f20e8..1889ae67ee3 100644
--- a/cpp/src/aggregation/result_cache.cpp
+++ b/cpp/src/aggregation/result_cache.cpp
@@ -38,7 +38,6 @@ void result_cache::add_result(column_view const& input,
 
 column_view result_cache::get_result(column_view const& input, aggregation const& agg) const
 {
-
   auto result_it = _cache.find({input, agg});
   CUDF_EXPECTS(result_it != _cache.end(), "Result does not exist in cache");
   return result_it->second.second->view();
@@ -47,10 +46,9 @@ column_view result_cache::get_result(column_view const& input, aggregation const
 std::unique_ptr<column> result_cache::release_result(column_view const& input,
                                                      aggregation const& agg)
 {
-
   auto node = _cache.extract({input, agg});
   CUDF_EXPECTS(not node.empty(), "Result does not exist in cache");
-  return std::move(result_it.mapped().second);
+  return std::move(node.mapped().second);
 }
 
 }  // namespace detail
diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp
index 129351c3d38..27a34843cc0 100644
--- a/cpp/src/groupby/common/utils.hpp
+++ b/cpp/src/groupby/common/utils.hpp
@@ -19,6 +19,8 @@
 #include <cudf/detail/aggregation/result_cache.hpp>
 #include <cudf/detail/groupby.hpp>
 #include <cudf/utilities/span.hpp>
+
+#include <memory>
 #include <vector>
 
 namespace cudf {
@@ -30,10 +32,24 @@ inline std::vector<aggregation_result> extract_results(host_span<RequestType con
                                                        cudf::detail::result_cache& cache)
 {
   std::vector<aggregation_result> results(requests.size());
-
+  std::unordered_map<std::pair<column_view, std::reference_wrapper<aggregation const>>,
+                     column_view,
+                     cudf::detail::pair_column_aggregation_hash,
+                     cudf::detail::pair_column_aggregation_equal_to>
+    repeated_result;
   for (size_t i = 0; i < requests.size(); i++) {
     for (auto&& agg : requests[i].aggregations) {
-      results[i].results.emplace_back(cache.release_result(requests[i].values, *agg));
+      if (cache.has_result(requests[i].values, *agg)) {
+        results[i].results.emplace_back(cache.release_result(requests[i].values, *agg));
+        repeated_result[{requests[i].values, *agg}] = results[i].results.back()->view();
+      } else {
+        auto it = repeated_result.find({requests[i].values, *agg});
+        if (it != repeated_result.end()) {
+          results[i].results.emplace_back(std::make_unique<column>(it->second));
+        } else {
+          CUDF_FAIL("Cannot extract result from the cache");
+        }
+      }
     }
   }
   return results;

From 95815250856ca02bdd5772e0b71b72547c2fccce Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Tue, 21 Sep 2021 04:38:13 +0530
Subject: [PATCH 38/79] address review comments

---
 cpp/src/column/column_view.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index d8132b4f545..25a1aa6f22b 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -98,7 +98,7 @@ constexpr auto hash(Ts&&... ts)
 std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false)
 {
   std::size_t const init = (c.is_empty() or is_parent_empty)
-                             ? hash(c.type(), c.size())
+                             ? hash(c.type(), 0)
                              : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
   return std::accumulate(c.child_begin(),
                          c.child_end(),
@@ -111,9 +111,9 @@ std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false
 
 std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl(input); }
 
-bool shallow_equal_impl(column_view const& lhs,
-                        column_view const& rhs,
-                        bool is_parent_empty = false)
+bool shallow_equivalent_impl(column_view const& lhs,
+                             column_view const& rhs,
+                             bool is_parent_empty = false)
 {
   bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty;
   return (lhs.type() == rhs.type()) and
@@ -124,12 +124,12 @@ bool shallow_equal_impl(column_view const& lhs,
                     rhs.child_begin(),
                     rhs.child_end(),
                     [is_empty](auto const& lhs_child, auto const& rhs_child) {
-                      return shallow_equal_impl(lhs_child, rhs_child, is_empty);
+                      return shallow_equivalent_impl(lhs_child, rhs_child, is_empty);
                     });
 }
 bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs)
 {
-  return shallow_equal_impl(lhs, rhs);
+  return shallow_equivalent_impl(lhs, rhs);
 }
 }  // namespace detail
 

From 1a5f367b7edd880fd1b2789d11a5531190339ee6 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 21 Sep 2021 21:29:57 +0530
Subject: [PATCH 39/79] Update cpp/src/column/column_view.cpp

Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com>
---
 cpp/src/column/column_view.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 25a1aa6f22b..5749cb48c0e 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -97,7 +97,7 @@ constexpr auto hash(Ts&&... ts)
 
 std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false)
 {
-  std::size_t const init = (c.is_empty() or is_parent_empty)
+  std::size_t const init = (is_parent_empty or c.is_empty())
                              ? hash(c.type(), 0)
                              : hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
   return std::accumulate(c.child_begin(),

From 63af02d65683a0b0bf34262911dcf37f8e6a2199 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 25 Sep 2021 01:37:50 +0530
Subject: [PATCH 40/79] add groupby correlation tests

---
 cpp/tests/CMakeLists.txt                |   1 +
 cpp/tests/groupby/correlation_tests.cpp | 163 ++++++++++++++++++++++++
 cpp/tests/groupby/mean_tests.cpp        |  60 ---------
 3 files changed, 164 insertions(+), 60 deletions(-)
 create mode 100644 cpp/tests/groupby/correlation_tests.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index cde170fb598..20f7211c882 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -59,6 +59,7 @@ ConfigureTest(GROUPBY_TEST
     groupby/argmax_tests.cpp
     groupby/collect_list_tests.cpp
     groupby/collect_set_tests.cpp
+    groupby/correlation_tests.cpp
     groupby/count_scan_tests.cpp
     groupby/count_tests.cpp
     groupby/groups_tests.cpp
diff --git a/cpp/tests/groupby/correlation_tests.cpp b/cpp/tests/groupby/correlation_tests.cpp
new file mode 100644
index 00000000000..db238ae5998
--- /dev/null
+++ b/cpp/tests/groupby/correlation_tests.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_list_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <limits>
+#include <vector>
+
+using namespace cudf::test::iterators;
+namespace cudf {
+namespace test {
+
+using structs = structs_column_wrapper;
+
+template <typename V>
+struct groupby_correlation_test : public cudf::test::BaseFixture {
+};
+
+using supported_types = RemoveIf<ContainedIn<Types<bool>>, cudf::test::NumericTypes>;
+
+TYPED_TEST_CASE(groupby_correlation_test, supported_types);
+using K = int32_t;
+
+TYPED_TEST(groupby_correlation_test, basic)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  auto keys     = fixed_width_column_wrapper<K>{{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}};
+  auto member_0 = fixed_width_column_wrapper<V>{{1, 1, 1, 2, 2, 3, 3, 1, 1, 4}};
+  auto member_1 = fixed_width_column_wrapper<V>{{1, 1, 1, 2, 0, 3, 3, 1, 1, 2}};
+  auto vals     = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+  fixed_width_column_wrapper<R, double> expect_vals{
+    {1.0, 0.6, std::numeric_limits<double>::quiet_NaN()}};
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_correlation_test, empty_cols)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  fixed_width_column_wrapper<K> keys{};
+  fixed_width_column_wrapper<V> member_0{}, member_1{};
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_correlation_test, zero_valid_keys)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  fixed_width_column_wrapper<K> keys({1, 2, 3}, all_nulls());
+  fixed_width_column_wrapper<V> member_0{3, 4, 5}, member_1{6, 7, 8};
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_correlation_test, zero_valid_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  fixed_width_column_wrapper<K> keys{1, 1, 1};
+  fixed_width_column_wrapper<V> member_0({3, 4, 5}, all_nulls());
+  fixed_width_column_wrapper<V> member_1({3, 4, 5}, all_nulls());
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1};
+  fixed_width_column_wrapper<R> expect_vals({0}, all_nulls());
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_correlation_test, null_keys_and_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3, -1, 1, 4, 4},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3, -1, 0, 2, 2});
+  auto vals = structs{{val0, val1}};
+
+  //                                        { 1, 1,     2, 2, 2,   3, 3,       4}
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  //                                        { 3, 6,     1, 4, 9,   2, 8,       3}
+  fixed_width_column_wrapper<R> expect_vals(
+    {1.0, 0.6, std::numeric_limits<double>::quiet_NaN(), 0.}, {1, 1, 1, 0});
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+struct groupby_dictionary_correlation_test : public cudf::test::BaseFixture {
+};
+
+TEST_F(groupby_dictionary_correlation_test, basic)
+{
+  using V = int16_t;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  auto keys     = fixed_width_column_wrapper<K>{{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}};
+  auto member_0 = dictionary_column_wrapper<V>{{1, 1, 1, 2, 2, 3, 3, 1, 1, 4}};
+  auto member_1 = dictionary_column_wrapper<V>{{1, 1, 1, 2, 0, 3, 3, 1, 1, 2}};
+  auto vals     = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+  fixed_width_column_wrapper<R, double> expect_vals{
+    {1.0, 0.6, std::numeric_limits<double>::quiet_NaN()}};
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/tests/groupby/mean_tests.cpp b/cpp/tests/groupby/mean_tests.cpp
index 9cbeca8163f..613e1555b79 100644
--- a/cpp/tests/groupby/mean_tests.cpp
+++ b/cpp/tests/groupby/mean_tests.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <cmath>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/utilities/traits.hpp>
 
@@ -161,64 +160,5 @@ TEST_F(groupby_dictionary_mean_test, basic)
     keys, vals, expect_keys, expect_vals, cudf::make_mean_aggregation<groupby_aggregation>());
 }
 
-struct groupby_corr_test : public cudf::test::BaseFixture {
-};
-template <typename T>
-using fwcw    = fixed_width_column_wrapper<T>;
-using structs = structs_column_wrapper;
-
-TEST_F(groupby_corr_test, basic)
-{
-  using K  = int32_t;
-  using M0 = uint8_t;
-  using M1 = int16_t;
-  using R  = cudf::detail::target_type_t<M0, aggregation::CORRELATION>;
-
-  // clang-format off
-  auto keys     = fwcw<K>  { 1,    2,    3,    1,    2,    2,    1,    3,    3,    2  };
-  auto member_0 = fwcw<M0>{{ 1,    1,    1,    2,    2,    3,    3,    1,    1,    4  }};//, null_at(1)};
-  auto member_1 = fwcw<M1>{{ 1,    1,    1,    2,   -2,    3,    3,    1,    1,   -4 }};//, null_at(7)};
-  auto values   = structs{{member_0, member_1}};//, null_at(4)};
-  // clang-format on
-
-  fixed_width_column_wrapper<K> expect_keys({1, 2, 3});
-  fixed_width_column_wrapper<R, double> expect_vals{
-    {1.000000, -0.41522739926869984, std::numeric_limits<double>::quiet_NaN()}};  //, null_at(2)};
-  // clang-format on
-
-  auto agg =
-    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
-  std::vector<groupby::aggregation_request> requests;
-  requests.emplace_back(groupby::aggregation_request());
-  requests[0].values = values;
-
-  requests[0].aggregations.push_back(std::move(agg));
-  requests.emplace_back(groupby::aggregation_request());
-  // WAR to force groupby to use sort implementation
-  requests[0].aggregations.push_back(make_nth_element_aggregation<groupby_aggregation>(0));
-
-  requests[1].values = column_view(values).child(0);
-  requests[1].aggregations.push_back(cudf::make_mean_aggregation<groupby_aggregation>());
-  requests[1].aggregations.push_back(cudf::make_std_aggregation<groupby_aggregation>());
-  requests.emplace_back(groupby::aggregation_request());
-  requests[2].values = column_view(values).child(1);
-  requests[2].aggregations.push_back(cudf::make_mean_aggregation<groupby_aggregation>());
-  requests[2].aggregations.push_back(cudf::make_std_aggregation<groupby_aggregation>());
-
-  groupby::groupby gb_obj(table_view({keys}));
-  auto result = gb_obj.aggregate(requests);
-
-  cudf::test::print(*result.second[0].results[0]);
-  cudf::test::print(*result.second[1].results[0]);
-  cudf::test::print(*result.second[1].results[1]);
-  cudf::test::print(*result.second[2].results[0]);
-  cudf::test::print(*result.second[2].results[1]);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({expect_keys}), result.first->view());
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
-    expect_vals, *result.second[0].results[0], debug_output_level::ALL_ERRORS);
-  // test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg));
-}
-
 }  // namespace test
 }  // namespace cudf

From 14dd5bb1791e28659e2e0edad2c397ab3cb66a9e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 25 Sep 2021 01:39:43 +0530
Subject: [PATCH 41/79] enable dict for sort groupby mean

---
 cpp/src/groupby/sort/aggregate.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index c7954db5d75..03476b32151 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -235,11 +235,14 @@ void aggregate_result_functor::operator()<aggregation::MEAN>(aggregation const&
 
   // TODO (dm): Special case for timestamp. Add target_type_impl for it.
   //            Blocked until we support operator+ on timestamps
+  auto col_type = cudf::is_dictionary(values.type())
+                    ? cudf::dictionary_column_view(values).keys().type()
+                    : values.type();
   auto result =
     cudf::detail::binary_operation(sum_result,
                                    count_result,
                                    binary_operator::DIV,
-                                   cudf::detail::target_type(values.type(), aggregation::MEAN),
+                                   cudf::detail::target_type(col_type, aggregation::MEAN),
                                    stream,
                                    mr);
   cache.add_result(values, agg, std::move(result));

From b0fea0202d87e6869b5b31675a647785c61e0f3e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 25 Sep 2021 01:40:22 +0530
Subject: [PATCH 42/79] update group_corr for null support

---
 cpp/src/groupby/sort/aggregate.cpp        |  46 +++++++--
 cpp/src/groupby/sort/group_corr.cu        | 111 ++++++++--------------
 cpp/src/groupby/sort/group_reductions.hpp |   4 +-
 3 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 03476b32151..f0931cd3b61 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -26,6 +26,7 @@
 #include <cudf/detail/binaryop.hpp>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/groupby/sort_helper.hpp>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
@@ -547,25 +548,56 @@ void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation
   CUDF_EXPECTS(corr_agg._type == correlation_type::PEARSON,
                "Only Pearson correlation is supported.");
 
+  // Correlation only for valid values in both columns.
+  auto [_, values_child0, values_child1] = [this]() {
+    rmm::device_buffer new_nullmask =
+      cudf::bitmask_and(table_view{{values.child(0), values.child(1)}});
+    auto null_count = cudf::count_unset_bits(
+      static_cast<cudf::bitmask_type const*>(new_nullmask.data()), 0, values.size());
+    if (null_count == 0) {
+      return std::make_tuple(std::move(new_nullmask), values.child(0), values.child(1));
+    }
+    auto column_view_with_new_nullmask = [](auto const& col, void* nullmask, auto null_count) {
+      return column_view(col.type(),
+                         col.size(),
+                         col.head(),
+                         static_cast<cudf::bitmask_type const*>(nullmask),
+                         null_count,
+                         col.offset(),
+                         std::vector(col.child_begin(), col.child_end()));
+    };
+    auto values_child0 =
+      null_count == values.child(0).null_count()
+        ? values.child(0)
+        : column_view_with_new_nullmask(values.child(0), new_nullmask.data(), null_count);
+    auto values_child1 =
+      null_count == values.child(1).null_count()
+        ? values.child(1)
+        : column_view_with_new_nullmask(values.child(1), new_nullmask.data(), null_count);
+    return std::make_tuple(std::move(new_nullmask), values_child0, values_child1);
+  }();
+
   auto std_agg = make_std_aggregation();
   cudf::detail::aggregation_dispatcher(
-    std_agg->kind, aggregate_result_functor(values.child(0), helper, cache, stream, mr), *std_agg);
+    std_agg->kind, aggregate_result_functor(values_child0, helper, cache, stream, mr), *std_agg);
   cudf::detail::aggregation_dispatcher(
-    std_agg->kind, aggregate_result_functor(values.child(1), helper, cache, stream, mr), *std_agg);
+    std_agg->kind, aggregate_result_functor(values_child1, helper, cache, stream, mr), *std_agg);
 
-  auto const stddev0 = cache.get_result(values.child(0), *std_agg);
-  auto const stddev1 = cache.get_result(values.child(1), *std_agg);
+  auto const stddev0 = cache.get_result(values_child0, *std_agg);
+  auto const stddev1 = cache.get_result(values_child1, *std_agg);
   auto mean_agg      = make_mean_aggregation();
-  auto const mean0   = cache.get_result(values.child(0), *mean_agg);
-  auto const mean1   = cache.get_result(values.child(1), *mean_agg);
+  auto const mean0   = cache.get_result(values_child0, *mean_agg);
+  auto const mean1   = cache.get_result(values_child1, *mean_agg);
+  auto count_agg     = make_count_aggregation();
+  auto const count   = cache.get_result(values_child0, *count_agg);
 
   cache.add_result(values,
                    agg,
                    detail::group_corr(get_grouped_values().child(0),
                                       get_grouped_values().child(1),
-                                      helper.group_offsets(stream),
                                       helper.group_labels(stream),
                                       helper.num_groups(stream),
+                                      count,
                                       mean0,
                                       mean1,
                                       stddev0,
diff --git a/cpp/src/groupby/sort/group_corr.cu b/cpp/src/groupby/sort/group_corr.cu
index 5c47676d4f9..4e3e916fdfb 100644
--- a/cpp/src/groupby/sort/group_corr.cu
+++ b/cpp/src/groupby/sort/group_corr.cu
@@ -14,31 +14,25 @@
  * limitations under the License.
  */
 
+#include <groupby/sort/group_reductions.hpp>
+
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
-#include <cudf/detail/binaryop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/unary.hpp>
 #include <cudf/detail/valid_if.cuh>
-#include <cudf/dictionary/detail/iterator.cuh>
-#include <cudf/structs/structs_column_view.hpp>
+#include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/utilities/span.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
-#include <memory>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
-#include <type_traits>
-#include "cudf/types.hpp"
-#include "groupby/sort/group_reductions.hpp"
-#include "thrust/functional.h"
-#include "thrust/iterator/counting_iterator.h"
-#include "thrust/iterator/zip_iterator.h"
 
+#include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/reduce.h>
 
+#include <type_traits>
+
 namespace cudf {
 namespace groupby {
 namespace detail {
@@ -75,7 +69,7 @@ struct type_casted_accessor {
 };
 
 template <typename ResultType>
-struct corr_transform {  // : thrust::unary_function<size_type, ResultType>
+struct corr_transform {
   column_device_view const d_values_0, d_values_1;
   ResultType const *d_means_0, *d_means_1;
   ResultType const *d_stddev_0, *d_stddev_1;
@@ -83,13 +77,21 @@ struct corr_transform {  // : thrust::unary_function<size_type, ResultType>
   size_type const* d_group_labels;
   size_type ddof{1};  // TODO update based on bias.
 
+  __device__ static ResultType value(column_device_view const& view, size_type i)
+  {
+    bool const is_dict = view.type().id() == type_id::DICTIONARY32;
+    i                  = is_dict ? static_cast<size_type>(view.element<dictionary32>(i)) : i;
+    auto values_col    = is_dict ? view.child(dictionary_column_view::keys_column_index) : view;
+    return type_dispatcher(values_col.type(), type_casted_accessor<ResultType>{}, i, values_col);
+  }
+
   __device__ ResultType operator()(size_type i)
   {
     if (d_values_0.is_null(i) or d_values_1.is_null(i)) return 0.0;
 
     // This has to be device dispatch because x and y type may differ
-    auto x = type_dispatcher(d_values_0.type(), type_casted_accessor<ResultType>{}, i, d_values_0);
-    auto y = type_dispatcher(d_values_1.type(), type_casted_accessor<ResultType>{}, i, d_values_1);
+    auto x = value(d_values_0, i);
+    auto y = value(d_values_1, i);
 
     size_type group_idx  = d_group_labels[i];
     size_type group_size = d_group_sizes[group_idx];
@@ -109,9 +111,9 @@ struct corr_transform {  // : thrust::unary_function<size_type, ResultType>
 // TODO Eventually this function should accept values_0, values_1, not a struct.
 std::unique_ptr<column> group_corr(column_view const& values_0,
                                    column_view const& values_1,
-                                   cudf::device_span<size_type const> group_offsets,
                                    cudf::device_span<size_type const> group_labels,
                                    size_type num_groups,
+                                   column_view const& count,
                                    column_view const& mean_0,
                                    column_view const& mean_1,
                                    column_view const& stddev_0,
@@ -125,27 +127,24 @@ std::unique_ptr<column> group_corr(column_view const& values_0,
                    result_type>);
 
   // check if each child type can be converted to float64.
-  bool const is_convertible = type_dispatcher(values_0.type(), is_double_convertible_impl{}) or
-                              type_dispatcher(values_1.type(), is_double_convertible_impl{});
+  auto get_base_type = [](auto const& col) {
+    return (col.type().id() == type_id::DICTIONARY32
+              ? col.child(dictionary_column_view::keys_column_index)
+              : col)
+      .type();
+  };
+  bool const is_convertible =
+    type_dispatcher(get_base_type(values_0), is_double_convertible_impl{}) or
+    type_dispatcher(get_base_type(values_1), is_double_convertible_impl{});
 
   CUDF_EXPECTS(is_convertible,
                "Input to `group_corr` must be columns of type convertible to float64.");
 
-  // TODO calculate COUNT_VALID  (need to do for 2 seperately. for MEAN, and
-  // bitmask_and->COUNT_VALID for CORR.)
-  // TODO calculate CORR. (requires MEAN1, MEAN2, COUNT_VALID_ANDed, STDDEV1, STDDEV2)
-  // TODO shuffle.
-
   auto mean0_ptr   = mean_0.begin<result_type>();
   auto mean1_ptr   = mean_1.begin<result_type>();
   auto stddev0_ptr = stddev_0.begin<result_type>();
   auto stddev1_ptr = stddev_1.begin<result_type>();
 
-  // TODO replace with ANDed bitmask. (values, stddev)
-  auto count1 = values_0.nullable()
-                  ? detail::group_count_valid(values_0, group_labels, num_groups, stream, mr)
-                  : detail::group_count_all(group_offsets, num_groups, stream, mr);
-
   auto d_values_0 = column_device_view::create(values_0, stream);
   auto d_values_1 = column_device_view::create(values_1, stream);
   corr_transform<result_type> corr_transform_op{*d_values_0,
@@ -154,15 +153,11 @@ std::unique_ptr<column> group_corr(column_view const& values_0,
                                                 mean1_ptr,
                                                 stddev0_ptr,
                                                 stddev1_ptr,
-                                                count1->view().data<size_type>(),
+                                                count.data<size_type>(),
                                                 group_labels.begin()};
 
-  // result
-  auto const any_nulls = values_0.has_nulls() or values_1.has_nulls();
-  auto mask_type       = any_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED;
-
-  auto result =
-    make_numeric_column(data_type(type_to_id<result_type>()), num_groups, mask_type, stream, mr);
+  auto result = make_numeric_column(
+    data_type(type_to_id<result_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
   auto d_result = result->mutable_view().begin<result_type>();
 
   auto corr_iter =
@@ -174,44 +169,16 @@ std::unique_ptr<column> group_corr(column_view const& values_0,
                         corr_iter,
                         thrust::make_discard_iterator(),
                         d_result);
-  return result;
-
-  // auto result_M2s = make_numeric_column(
-  //   data_type(type_to_id<result_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
-  // auto validities = rmm::device_uvector<int8_t>(num_groups, stream);
-
-  // // Perform merging for all the aggregations. Their output (and their validity data) are written
-  // // out concurrently through an output zip iterator.
-  // using iterator_tuple  = thrust::tuple<size_type*, result_type*, result_type*, int8_t*>;
-  // using output_iterator = thrust::zip_iterator<iterator_tuple>;
-  // auto const out_iter =
-  //   output_iterator{thrust::make_tuple(result_counts->mutable_view().template data<size_type>(),
-  //                                      result_means->mutable_view().template data<result_type>(),
-  //                                      result_M2s->mutable_view().template data<result_type>(),
-  //                                      validities.begin())};
-
-  // auto const count_valid = values.child(0);
-  // auto const mean_values = values.child(1);
-  // auto const M2_values   = values.child(2);
-  // auto const iter        = thrust::make_counting_iterator<size_type>(0);
-
-  // auto const fn = merge_fn<result_type>{group_offsets.begin(),
-  //                                       count_valid.template begin<size_type>(),
-  //                                       mean_values.template begin<result_type>(),
-  //                                       M2_values.template begin<result_type>()};
-  // thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn);
-
-  // // Generate bitmask for the output.
-  // // Only mean and M2 values can be nullable. Count column must be non-nullable.
-  // auto [null_mask, null_count] = cudf::detail::valid_if(
-  //   validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);
-  // if (null_count > 0) {
-  //   result_means->set_null_mask(null_mask, null_count);           // copy null_mask
-  //   result_M2s->set_null_mask(std::move(null_mask), null_count);  // take over null_mask
-  // }
-
-  // Output is a structs column containing the merged values of `COUNT_VALID`, `MEAN`, and `M2`.
 
+  auto is_null = [ddof = corr_transform_op.ddof] __device__(size_type group_size) {
+    return not(group_size == 0 or group_size - ddof <= 0);
+  };
+  auto [new_nullmask, null_count] =
+    cudf::detail::valid_if(count.begin<size_type>(), count.end<size_type>(), is_null, stream, mr);
+  if (null_count != 0) {
+    result->set_null_mask(std::move(new_nullmask));
+    result->set_null_count(null_count);
+  }
   return result;
 }
 
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 5bd658d8f76..7133da1a7e7 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -447,9 +447,9 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
  *
  * @param values_0 The first grouped values column to correlate
  * @param values_1 The second grouped values column to correlate
- * @param group_offsets Offsets of groups' starting points within @p values.
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups.
+ * @param count The count of valid rows of the grouped values of both columns
  * @param mean_0 The mean of the first grouped values column
  * @param mean_1 The mean of the second grouped values column
  * @param stddev_0 The standard deviation of the first grouped values column
@@ -459,9 +459,9 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
  */
 std::unique_ptr<column> group_corr(column_view const& values_0,
                                    column_view const& values_1,
-                                   cudf::device_span<size_type const> group_offsets,
                                    cudf::device_span<size_type const> group_labels,
                                    size_type num_groups,
+                                   column_view const& count,
                                    column_view const& mean_0,
                                    column_view const& mean_1,
                                    column_view const& stddev_0,

From 57db9014757564bd964acd00dde46d883bab928b Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 25 Sep 2021 01:44:21 +0530
Subject: [PATCH 43/79] rename group_corr to group_correlation

---
 cpp/CMakeLists.txt                            |  2 +-
 cpp/src/groupby/sort/aggregate.cpp            | 22 +++++++++---------
 .../{group_corr.cu => group_correlation.cu}   | 23 +++++++++----------
 cpp/src/groupby/sort/group_reductions.hpp     | 22 +++++++++---------
 4 files changed, 34 insertions(+), 35 deletions(-)
 rename cpp/src/groupby/sort/{group_corr.cu => group_correlation.cu} (88%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 959d858f2e6..ea29b6ab152 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -219,7 +219,7 @@ add_library(cudf
     src/groupby/sort/group_argmax.cu
     src/groupby/sort/group_argmin.cu
     src/groupby/sort/group_collect.cu
-    src/groupby/sort/group_corr.cu
+    src/groupby/sort/group_correlation.cu
     src/groupby/sort/group_count.cu
     src/groupby/sort/group_m2.cu
     src/groupby/sort/group_max.cu
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index f0931cd3b61..25bca9c12b2 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -593,17 +593,17 @@ void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation
 
   cache.add_result(values,
                    agg,
-                   detail::group_corr(get_grouped_values().child(0),
-                                      get_grouped_values().child(1),
-                                      helper.group_labels(stream),
-                                      helper.num_groups(stream),
-                                      count,
-                                      mean0,
-                                      mean1,
-                                      stddev0,
-                                      stddev1,
-                                      stream,
-                                      mr));
+                   detail::group_correlation(get_grouped_values().child(0),
+                                             get_grouped_values().child(1),
+                                             helper.group_labels(stream),
+                                             helper.num_groups(stream),
+                                             count,
+                                             mean0,
+                                             mean1,
+                                             stddev0,
+                                             stddev1,
+                                             stream,
+                                             mr));
 };
 
 }  // namespace detail
diff --git a/cpp/src/groupby/sort/group_corr.cu b/cpp/src/groupby/sort/group_correlation.cu
similarity index 88%
rename from cpp/src/groupby/sort/group_corr.cu
rename to cpp/src/groupby/sort/group_correlation.cu
index 4e3e916fdfb..04a8c5909bb 100644
--- a/cpp/src/groupby/sort/group_corr.cu
+++ b/cpp/src/groupby/sort/group_correlation.cu
@@ -108,18 +108,17 @@ struct corr_transform {
 };
 }  // namespace
 
-// TODO Eventually this function should accept values_0, values_1, not a struct.
-std::unique_ptr<column> group_corr(column_view const& values_0,
-                                   column_view const& values_1,
-                                   cudf::device_span<size_type const> group_labels,
-                                   size_type num_groups,
-                                   column_view const& count,
-                                   column_view const& mean_0,
-                                   column_view const& mean_1,
-                                   column_view const& stddev_0,
-                                   column_view const& stddev_1,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> group_correlation(column_view const& values_0,
+                                          column_view const& values_1,
+                                          cudf::device_span<size_type const> group_labels,
+                                          size_type num_groups,
+                                          column_view const& count,
+                                          column_view const& mean_0,
+                                          column_view const& mean_1,
+                                          column_view const& stddev_0,
+                                          column_view const& stddev_1,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::mr::device_memory_resource* mr)
 {
   using result_type = id_to_type<type_id::FLOAT64>;
   static_assert(
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 7133da1a7e7..6e2ba2815c5 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -457,17 +457,17 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> group_corr(column_view const& values_0,
-                                   column_view const& values_1,
-                                   cudf::device_span<size_type const> group_labels,
-                                   size_type num_groups,
-                                   column_view const& count,
-                                   column_view const& mean_0,
-                                   column_view const& mean_1,
-                                   column_view const& stddev_0,
-                                   column_view const& stddev_1,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> group_correlation(column_view const& values_0,
+                                          column_view const& values_1,
+                                          cudf::device_span<size_type const> group_labels,
+                                          size_type num_groups,
+                                          column_view const& count,
+                                          column_view const& mean_0,
+                                          column_view const& mean_1,
+                                          column_view const& stddev_0,
+                                          column_view const& stddev_1,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::mr::device_memory_resource* mr);
 /** @endinternal
  *
  */

From 0d1a91e773d202c08c392827c69f8cace4dc4792 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Sat, 25 Sep 2021 02:31:06 +0530
Subject: [PATCH 44/79] update doc

---
 cpp/src/groupby/sort/group_reductions.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index 6e2ba2815c5..a6c61b3a9fd 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -443,7 +443,6 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
                                        rmm::mr::device_memory_resource* mr);
 /**
  * @brief Internal API to find correlation of child columns of a non-nullable struct column.
- * TODO fill documentation.
  *
  * @param values_0 The first grouped values column to correlate
  * @param values_1 The second grouped values column to correlate
@@ -454,8 +453,8 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
  * @param mean_1 The mean of the second grouped values column
  * @param stddev_0 The standard deviation of the first grouped values column
  * @param stddev_1 The standard deviation of the second grouped values column
- * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory
  */
 std::unique_ptr<column> group_correlation(column_view const& values_0,
                                           column_view const& values_1,

From 6cd47bce5762cdffb66d47d76845a921b0890da4 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 27 Sep 2021 12:44:52 +0530
Subject: [PATCH 45/79] minor comment corrections

---
 cpp/src/aggregation/aggregation.cpp       | 2 +-
 cpp/src/groupby/sort/aggregate.cpp        | 2 --
 cpp/src/groupby/sort/group_correlation.cu | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 426fef279d9..26ab5936a74 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -699,7 +699,7 @@ std::unique_ptr<Base> make_merge_m2_aggregation()
 template std::unique_ptr<aggregation> make_merge_m2_aggregation<aggregation>();
 template std::unique_ptr<groupby_aggregation> make_merge_m2_aggregation<groupby_aggregation>();
 
-/// Factory to create a CORR aggregation
+/// Factory to create a CORRELATION aggregation
 template <typename Base>
 std::unique_ptr<Base> make_correlation_aggregation(correlation_type type)
 {
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 7ecf4f5855a..45227368097 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -580,8 +580,6 @@ void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation
   auto std_agg = make_std_aggregation();
   aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()<aggregation::STD>(*std_agg);
   aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()<aggregation::STD>(*std_agg);
-  // cudf::detail::aggregation_dispatcher(
-  //   std_agg->kind, aggregate_result_functor(values_child1, helper, cache, stream, mr), *std_agg);
 
   auto const stddev0 = cache.get_result(values_child0, *std_agg);
   auto const stddev1 = cache.get_result(values_child1, *std_agg);
diff --git a/cpp/src/groupby/sort/group_correlation.cu b/cpp/src/groupby/sort/group_correlation.cu
index 04a8c5909bb..70a2cbd9bb8 100644
--- a/cpp/src/groupby/sort/group_correlation.cu
+++ b/cpp/src/groupby/sort/group_correlation.cu
@@ -53,8 +53,7 @@ struct is_double_convertible_impl {
 };
 
 /**
- * @brief Type casts each element of the column to `CastType`
- *
+ * @brief Typecasts each element of the column to `CastType`
  */
 template <typename CastType>
 struct type_casted_accessor {

From 075ec73edfb842cd483752f322773528c26b555c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 30 Sep 2021 22:49:16 +0530
Subject: [PATCH 46/79] add covariance, refactor correlation to use covariance

---
 cpp/include/cudf/aggregation.hpp              |  11 ++
 .../cudf/detail/aggregation/aggregation.hpp   |  32 +++-
 cpp/src/aggregation/aggregation.cpp           |  20 +++
 cpp/src/groupby/sort/aggregate.cpp            | 150 ++++++++++++------
 cpp/src/groupby/sort/group_correlation.cu     |  83 ++++++----
 cpp/src/groupby/sort/group_reductions.hpp     |  31 ++--
 6 files changed, 234 insertions(+), 93 deletions(-)

diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index 7a62a64b6dc..9e556dbe704 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -88,6 +88,7 @@ class aggregation {
     MERGE_LISTS,     ///< merge multiple lists values into one list
     MERGE_SETS,      ///< merge multiple lists values into one list then drop duplicate entries
     MERGE_M2,        ///< merge partial values of M2 aggregation,
+    COVARIANCE,      ///< covariance between two sets of elements
     CORRELATION,     ///< correlation between two sets of elements
     TDIGEST,         ///< create a tdigest from a set of input values
     MERGE_TDIGEST    ///< create a tdigest by merging multiple tdigests together
@@ -497,6 +498,15 @@ std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = nu
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_merge_m2_aggregation();
 
+/**
+ * @brief Factory to create a COVARIANCE aggregation
+ *
+ * Compute covariance between two columns.
+ * The input columns are child columns of a non-nullable struct columns.
+ */
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_covariance_aggregation();
+
 /**
  * @brief Factory to create a CORRELATION aggregation
  *
@@ -507,6 +517,7 @@ std::unique_ptr<Base> make_merge_m2_aggregation();
  */
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_correlation_aggregation(correlation_type type);
+
 /**
  * @brief Factory to create a TDIGEST aggregation
  *
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index ff192c438c7..e12ed3f521e 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -91,6 +91,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class merge_sets_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class merge_m2_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
+                                                          class covariance_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class correlation_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
@@ -131,6 +133,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class merge_lists_aggregation const& agg);
   virtual void visit(class merge_sets_aggregation const& agg);
   virtual void visit(class merge_m2_aggregation const& agg);
+  virtual void visit(class covariance_aggregation const& agg);
   virtual void visit(class correlation_aggregation const& agg);
   virtual void visit(class tdigest_aggregation const& agg);
   virtual void visit(class merge_tdigest_aggregation const& agg);
@@ -893,6 +896,25 @@ class merge_m2_aggregation final : public groupby_aggregation {
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived aggregation class for specifying COVARIANCE aggregation
+ */
+class covariance_aggregation final : public groupby_aggregation {
+ public:
+  explicit covariance_aggregation() : aggregation{COVARIANCE} {}
+
+  std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<covariance_aggregation>(*this);
+  }
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Derived aggregation class for specifying CORRELATION aggregation
  */
@@ -1209,7 +1231,13 @@ struct target_type_impl<SourceType, aggregation::MERGE_M2> {
   using type = struct_view;
 };
 
-// Always use struct for CORRELATION
+// Always use double for COVARIANCE
+template <typename SourceType>
+struct target_type_impl<SourceType, aggregation::COVARIANCE> {
+  using type = double;
+};
+
+// Always use double for CORRELATION
 template <typename SourceType>
 struct target_type_impl<SourceType, aggregation::CORRELATION> {
   using type = double;
@@ -1337,6 +1365,8 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin
       return f.template operator()<aggregation::MERGE_SETS>(std::forward<Ts>(args)...);
     case aggregation::MERGE_M2:
       return f.template operator()<aggregation::MERGE_M2>(std::forward<Ts>(args)...);
+    case aggregation::COVARIANCE:
+      return f.template operator()<aggregation::COVARIANCE>(std::forward<Ts>(args)...);
     case aggregation::CORRELATION:
       return f.template operator()<aggregation::CORRELATION>(std::forward<Ts>(args)...);
     case aggregation::TDIGEST:
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 26ab5936a74..c2b7449fc96 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -202,6 +202,11 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, covariance_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, correlation_aggregation const& agg)
 {
@@ -363,10 +368,16 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
+void aggregation_finalizer::visit(covariance_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
+
 void aggregation_finalizer::visit(correlation_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
 }
+
 void aggregation_finalizer::visit(tdigest_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
@@ -699,6 +710,15 @@ std::unique_ptr<Base> make_merge_m2_aggregation()
 template std::unique_ptr<aggregation> make_merge_m2_aggregation<aggregation>();
 template std::unique_ptr<groupby_aggregation> make_merge_m2_aggregation<groupby_aggregation>();
 
+/// Factory to create a COVARIANCE aggregation
+template <typename Base>
+std::unique_ptr<Base> make_covariance_aggregation()
+{
+  return std::make_unique<detail::covariance_aggregation>();
+}
+template std::unique_ptr<aggregation> make_covariance_aggregation<aggregation>();
+template std::unique_ptr<groupby_aggregation> make_covariance_aggregation<groupby_aggregation>();
+
 /// Factory to create a CORRELATION aggregation
 template <typename Base>
 std::unique_ptr<Base> make_correlation_aggregation(correlation_type type)
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 45227368097..7cddfef5712 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -529,6 +529,77 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 };
 
+/**
+ * @brief Creates column views with only valid elements in both input column views
+ *
+ * @param column_0 The first column
+ * @param column_1 The second column
+ * @return tuple with new null mask (if null masks if input differ) and new column views
+ */
+auto column_view_with_common_nulls(column_view const& column_0, column_view const& column_1)
+{
+  rmm::device_buffer new_nullmask = cudf::bitmask_and(table_view{{column_0, column_1}});
+  auto null_count                 = cudf::count_unset_bits(
+    static_cast<cudf::bitmask_type const*>(new_nullmask.data()), 0, column_0.size());
+  if (null_count == 0) { return std::make_tuple(std::move(new_nullmask), column_0, column_1); }
+  auto column_view_with_new_nullmask = [](auto const& col, void* nullmask, auto null_count) {
+    return column_view(col.type(),
+                       col.size(),
+                       col.head(),
+                       static_cast<cudf::bitmask_type const*>(nullmask),
+                       null_count,
+                       col.offset(),
+                       std::vector(col.child_begin(), col.child_end()));
+  };
+  auto new_column_0 = null_count == column_0.null_count()
+                        ? column_0
+                        : column_view_with_new_nullmask(column_0, new_nullmask.data(), null_count);
+  auto new_column_1 = null_count == column_1.null_count()
+                        ? column_1
+                        : column_view_with_new_nullmask(column_1, new_nullmask.data(), null_count);
+  return std::make_tuple(std::move(new_nullmask), new_column_0, new_column_1);
+}
+
+/**
+ * @brief Perform covariance betweeen two child columns of non-nullable struct column.
+ *
+ */
+template <>
+void aggregate_result_functor::operator()<aggregation::COVARIANCE>(aggregation const& agg)
+{
+  if (cache.has_result(values, agg)) { return; }
+  CUDF_EXPECTS(values.type().id() == type_id::STRUCT,
+               "Input to `groupby covariance` must be a structs column.");
+  CUDF_EXPECTS(values.num_children() == 2,
+               "Input to `groupby covariance` must be a structs column having 2 children columns.");
+
+  // Covariance only for valid values in both columns.
+  // in non-identical null mask cases, this prevents caching of the results - STD, MEAN, COUNT.
+  auto [_, values_child0, values_child1] =
+    column_view_with_common_nulls(values.child(0), values.child(1));
+
+  auto mean_agg = make_mean_aggregation();
+  aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()<aggregation::MEAN>(*mean_agg);
+  aggregate_result_functor(values_child1, helper, cache, stream, mr).operator()<aggregation::MEAN>(*mean_agg);
+
+  auto const mean0 = cache.get_result(values_child0, *mean_agg);
+  auto const mean1 = cache.get_result(values_child1, *mean_agg);
+  auto count_agg   = make_count_aggregation();
+  auto const count = cache.get_result(values_child0, *count_agg);
+
+  cache.add_result(values,
+                   agg,
+                   detail::group_covariance(get_grouped_values().child(0),
+                                            get_grouped_values().child(1),
+                                            helper.group_labels(stream),
+                                            helper.num_groups(stream),
+                                            count,
+                                            mean0,
+                                            mean1,
+                                            stream,
+                                            mr));
+};
+
 /**
  * @brief Perform correlation betweeen two child columns of non-nullable struct column.
  *
@@ -538,44 +609,21 @@ void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation
 {
   if (cache.has_result(values, agg)) { return; }
   CUDF_EXPECTS(values.type().id() == type_id::STRUCT,
-               "Input to `group_corr` must be a structs column.");
-  CUDF_EXPECTS(values.num_children() == 2,
-               "Input to `group_corr` must be a structs column having 2 children columns.");
+               "Input to `groupby correlation` must be a structs column.");
+  CUDF_EXPECTS(
+    values.num_children() == 2,
+    "Input to `groupby correlation` must be a structs column having 2 children columns.");
   CUDF_EXPECTS(values.nullable() == false,
-               "Input to `group_corr` must be a non-nullable structs column.");
+               "Input to `groupby correlation` must be a non-nullable structs column.");
 
   auto const& corr_agg = dynamic_cast<cudf::detail::correlation_aggregation const&>(agg);
   CUDF_EXPECTS(corr_agg._type == correlation_type::PEARSON,
                "Only Pearson correlation is supported.");
 
   // Correlation only for valid values in both columns.
-  auto [_, values_child0, values_child1] = [this]() {
-    rmm::device_buffer new_nullmask =
-      cudf::bitmask_and(table_view{{values.child(0), values.child(1)}});
-    auto null_count = cudf::count_unset_bits(
-      static_cast<cudf::bitmask_type const*>(new_nullmask.data()), 0, values.size());
-    if (null_count == 0) {
-      return std::make_tuple(std::move(new_nullmask), values.child(0), values.child(1));
-    }
-    auto column_view_with_new_nullmask = [](auto const& col, void* nullmask, auto null_count) {
-      return column_view(col.type(),
-                         col.size(),
-                         col.head(),
-                         static_cast<cudf::bitmask_type const*>(nullmask),
-                         null_count,
-                         col.offset(),
-                         std::vector(col.child_begin(), col.child_end()));
-    };
-    auto values_child0 =
-      null_count == values.child(0).null_count()
-        ? values.child(0)
-        : column_view_with_new_nullmask(values.child(0), new_nullmask.data(), null_count);
-    auto values_child1 =
-      null_count == values.child(1).null_count()
-        ? values.child(1)
-        : column_view_with_new_nullmask(values.child(1), new_nullmask.data(), null_count);
-    return std::make_tuple(std::move(new_nullmask), values_child0, values_child1);
-  }();
+  // in non-identical null mask cases, this prevents caching of the results - STD, MEAN, COUNT
+  auto [_, values_child0, values_child1] =
+    column_view_with_common_nulls(values.child(0), values.child(1));
 
   auto std_agg = make_std_aggregation();
   aggregate_result_functor(values_child0, helper, cache, stream, mr).operator()<aggregation::STD>(*std_agg);
@@ -583,26 +631,30 @@ void aggregate_result_functor::operator()<aggregation::CORRELATION>(aggregation
 
   auto const stddev0 = cache.get_result(values_child0, *std_agg);
   auto const stddev1 = cache.get_result(values_child1, *std_agg);
-  auto mean_agg      = make_mean_aggregation();
-  auto const mean0   = cache.get_result(values_child0, *mean_agg);
-  auto const mean1   = cache.get_result(values_child1, *mean_agg);
-  auto count_agg     = make_count_aggregation();
-  auto const count   = cache.get_result(values_child0, *count_agg);
 
+  auto mean_agg    = make_mean_aggregation();
+  auto const mean0 = cache.get_result(values_child0, *mean_agg);
+  auto const mean1 = cache.get_result(values_child1, *mean_agg);
+  auto count_agg   = make_count_aggregation();
+  auto const count = cache.get_result(values_child0, *count_agg);
+
+  // Compute covariance here to avoid repeated computation of mean & count
+  auto cov_agg = make_covariance_aggregation();
   cache.add_result(values,
-                   agg,
-                   detail::group_correlation(get_grouped_values().child(0),
-                                             get_grouped_values().child(1),
-                                             helper.group_labels(stream),
-                                             helper.num_groups(stream),
-                                             count,
-                                             mean0,
-                                             mean1,
-                                             stddev0,
-                                             stddev1,
-                                             stream,
-                                             mr));
-};
+                   *cov_agg,
+                   detail::group_covariance(get_grouped_values().child(0),
+                                            get_grouped_values().child(1),
+                                            helper.group_labels(stream),
+                                            helper.num_groups(stream),
+                                            count,
+                                            mean0,
+                                            mean1,
+                                            stream,
+                                            mr));
+  auto const covariance = cache.get_result(values, *cov_agg);
+  cache.add_result(
+    values, agg, detail::group_correlation(covariance, stddev0, stddev1, stream, mr));
+}
 
 /**
  * @brief Generate a tdigest column from a grouped set of numeric input values.
diff --git a/cpp/src/groupby/sort/group_correlation.cu b/cpp/src/groupby/sort/group_correlation.cu
index 70a2cbd9bb8..0c11f62ea3b 100644
--- a/cpp/src/groupby/sort/group_correlation.cu
+++ b/cpp/src/groupby/sort/group_correlation.cu
@@ -68,10 +68,9 @@ struct type_casted_accessor {
 };
 
 template <typename ResultType>
-struct corr_transform {
+struct covariance_transform {
   column_device_view const d_values_0, d_values_1;
   ResultType const *d_means_0, *d_means_1;
-  ResultType const *d_stddev_0, *d_stddev_1;
   size_type const* d_group_sizes;
   size_type const* d_group_labels;
   size_type ddof{1};  // TODO update based on bias.
@@ -98,26 +97,22 @@ struct corr_transform {
     // prevent divide by zero error
     if (group_size == 0 or group_size - ddof <= 0) return 0.0;
 
-    ResultType xmean   = d_means_0[group_idx];
-    ResultType ymean   = d_means_1[group_idx];
-    ResultType xstddev = d_stddev_0[group_idx];
-    ResultType ystddev = d_stddev_1[group_idx];
-    return (x - xmean) * (y - ymean) / (group_size - ddof) / xstddev / ystddev;
+    ResultType xmean = d_means_0[group_idx];
+    ResultType ymean = d_means_1[group_idx];
+    return (x - xmean) * (y - ymean) / (group_size - ddof);
   }
 };
 }  // namespace
 
-std::unique_ptr<column> group_correlation(column_view const& values_0,
-                                          column_view const& values_1,
-                                          cudf::device_span<size_type const> group_labels,
-                                          size_type num_groups,
-                                          column_view const& count,
-                                          column_view const& mean_0,
-                                          column_view const& mean_1,
-                                          column_view const& stddev_0,
-                                          column_view const& stddev_1,
-                                          rmm::cuda_stream_view stream,
-                                          rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> group_covariance(column_view const& values_0,
+                                         column_view const& values_1,
+                                         cudf::device_span<size_type const> group_labels,
+                                         size_type num_groups,
+                                         column_view const& count,
+                                         column_view const& mean_0,
+                                         column_view const& mean_1,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
 {
   using result_type = id_to_type<type_id::FLOAT64>;
   static_assert(
@@ -136,30 +131,22 @@ std::unique_ptr<column> group_correlation(column_view const& values_0,
     type_dispatcher(get_base_type(values_1), is_double_convertible_impl{});
 
   CUDF_EXPECTS(is_convertible,
-               "Input to `group_corr` must be columns of type convertible to float64.");
+               "Input to `group_correlation` must be columns of type convertible to float64.");
 
-  auto mean0_ptr   = mean_0.begin<result_type>();
-  auto mean1_ptr   = mean_1.begin<result_type>();
-  auto stddev0_ptr = stddev_0.begin<result_type>();
-  auto stddev1_ptr = stddev_1.begin<result_type>();
+  auto mean0_ptr = mean_0.begin<result_type>();
+  auto mean1_ptr = mean_1.begin<result_type>();
 
   auto d_values_0 = column_device_view::create(values_0, stream);
   auto d_values_1 = column_device_view::create(values_1, stream);
-  corr_transform<result_type> corr_transform_op{*d_values_0,
-                                                *d_values_1,
-                                                mean0_ptr,
-                                                mean1_ptr,
-                                                stddev0_ptr,
-                                                stddev1_ptr,
-                                                count.data<size_type>(),
-                                                group_labels.begin()};
+  covariance_transform<result_type> covariance_transform_op{
+    *d_values_0, *d_values_1, mean0_ptr, mean1_ptr, count.data<size_type>(), group_labels.begin()};
 
   auto result = make_numeric_column(
     data_type(type_to_id<result_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
   auto d_result = result->mutable_view().begin<result_type>();
 
   auto corr_iter =
-    thrust::make_transform_iterator(thrust::make_counting_iterator(0), corr_transform_op);
+    thrust::make_transform_iterator(thrust::make_counting_iterator(0), covariance_transform_op);
 
   thrust::reduce_by_key(rmm::exec_policy(stream),
                         group_labels.begin(),
@@ -168,7 +155,7 @@ std::unique_ptr<column> group_correlation(column_view const& values_0,
                         thrust::make_discard_iterator(),
                         d_result);
 
-  auto is_null = [ddof = corr_transform_op.ddof] __device__(size_type group_size) {
+  auto is_null = [ddof = covariance_transform_op.ddof] __device__(size_type group_size) {
     return not(group_size == 0 or group_size - ddof <= 0);
   };
   auto [new_nullmask, null_count] =
@@ -180,6 +167,36 @@ std::unique_ptr<column> group_correlation(column_view const& values_0,
   return result;
 }
 
+std::unique_ptr<column> group_correlation(column_view const& covariance,
+                                          column_view const& stddev_0,
+                                          column_view const& stddev_1,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::mr::device_memory_resource* mr)
+{
+  using result_type = id_to_type<type_id::FLOAT64>;
+  CUDF_EXPECTS(covariance.type().id() == type_id::FLOAT64,
+               "Covariance result as FLOAT64 is supported");
+  auto stddev0_ptr = stddev_0.begin<result_type>();
+  auto stddev1_ptr = stddev_1.begin<result_type>();
+  auto stddev_iter = thrust::make_zip_iterator(thrust::make_tuple(stddev0_ptr, stddev1_ptr));
+  auto result      = make_numeric_column(covariance.type(),
+                                    covariance.size(),
+                                    cudf::detail::copy_bitmask(covariance, stream, mr),
+                                    covariance.null_count(),
+                                    stream,
+                                    mr);
+  auto d_result    = result->mutable_view().begin<result_type>();
+  thrust::transform(rmm::exec_policy(stream),
+                    covariance.begin<result_type>(),
+                    covariance.end<result_type>(),
+                    stddev_iter,
+                    d_result,
+                    [] __device__(auto const covariance, auto const stddev) {
+                      return covariance / thrust::get<0>(stddev) / thrust::get<1>(stddev);
+                    });
+  return result;
+}
+
 }  // namespace detail
 }  // namespace groupby
 }  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index a56a3ae38cd..c40ef56a839 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -442,27 +442,38 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
 /**
- * @brief Internal API to find correlation of child columns of a non-nullable struct column.
+ * @brief Internal API to find covariance of child columns of a non-nullable struct column.
  *
- * @param values_0 The first grouped values column to correlate
- * @param values_1 The second grouped values column to correlate
+ * @param values_0 The first grouped values column to compute covariance
+ * @param values_1 The second grouped values column to compute covariance
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups.
  * @param count The count of valid rows of the grouped values of both columns
  * @param mean_0 The mean of the first grouped values column
  * @param mean_1 The mean of the second grouped values column
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<column> group_covariance(column_view const& values_0,
+                                         column_view const& values_1,
+                                         cudf::device_span<size_type const> group_labels,
+                                         size_type num_groups,
+                                         column_view const& count,
+                                         column_view const& mean_0,
+                                         column_view const& mean_1,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to find correlation from covariance and standard deviation.
+ *
+ * @param covariance The covariance of two grouped values columns
  * @param stddev_0 The standard deviation of the first grouped values column
  * @param stddev_1 The standard deviation of the second grouped values column
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column's device memory
  */
-std::unique_ptr<column> group_correlation(column_view const& values_0,
-                                          column_view const& values_1,
-                                          cudf::device_span<size_type const> group_labels,
-                                          size_type num_groups,
-                                          column_view const& count,
-                                          column_view const& mean_0,
-                                          column_view const& mean_1,
+std::unique_ptr<column> group_correlation(column_view const& covariance,
                                           column_view const& stddev_0,
                                           column_view const& stddev_1,
                                           rmm::cuda_stream_view stream,

From 60532e86525ba1b7a68bd28d791a31f02312df3d Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 30 Sep 2021 12:33:40 -0700
Subject: [PATCH 47/79] create new PR

---
 python/cudf/cudf/_lib/aggregation.pyx     | 21 +++++++++++++++
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 10 ++++++++
 python/cudf/cudf/_lib/groupby.pyx         |  2 +-
 python/cudf/cudf/core/groupby/groupby.py  | 31 +++++++++++++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 4f703724cef..76eb3ba3bb2 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -57,6 +57,7 @@ class AggregationKind(Enum):
     UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET
     PTX = libcudf_aggregation.aggregation.Kind.PTX
     CUDA = libcudf_aggregation.aggregation.Kind.CUDA
+    CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION
 
 
 cdef class Aggregation:
@@ -321,6 +322,15 @@ cdef class Aggregation:
             ))
         return agg
 
+    @classmethod
+    def corr(cls):
+        cdef Aggregation agg = cls()
+        agg.c_obj = move(
+            libcudf_aggregation.make_correlation_aggregation[aggregation](
+                libcudf_aggregation.correlation_type.PEARSON
+            ))
+        return agg
+
 cdef class RollingAggregation:
     """A Cython wrapper for rolling window aggregations.
 
@@ -692,6 +702,17 @@ cdef class GroupbyAggregation:
         )
         return agg
 
+    @classmethod
+    def corr(cls):
+        cdef GroupbyAggregation agg = cls()
+        agg.c_obj = move(
+            libcudf_aggregation.
+            make_correlation_aggregation[groupby_aggregation](
+                libcudf_aggregation.correlation_type.PEARSON
+            ))
+
+        return agg
+
 cdef class GroupbyScanAggregation:
     """A Cython wrapper for groupby scan aggregations.
 
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 13bfa49057c..04deeb877d1 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -38,6 +38,8 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
+            CORRELATION 'cudf::aggregation::CORRELATION'
+
         Kind kind
 
     cdef cppclass rolling_aggregation:
@@ -53,6 +55,11 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'
 
+    ctypedef enum correlation_type:
+        PEARSON 'cudf::correlation_type::PEARSON'
+        KENDALL 'cudf::correlation_type::KENDALL'
+        SPEARMAN 'cudf::correlation_type::SPEARMAN'
+
     cdef unique_ptr[T] make_sum_aggregation[T]() except +
 
     cdef unique_ptr[T] make_product_aggregation[T]() except +
@@ -106,3 +113,6 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
+
+    cdef unique_ptr[T] make_correlation_aggregation[T](
+        correlation_type type) except +
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 19ef6555a6e..a41b7c79520 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -54,7 +54,7 @@ _CATEGORICAL_AGGS = {"COUNT", "SIZE", "NUNIQUE", "UNIQUE"}
 _STRING_AGGS = {"COUNT", "SIZE", "MAX", "MIN", "NUNIQUE", "NTH", "COLLECT",
                 "UNIQUE"}
 _LIST_AGGS = {"COLLECT"}
-_STRUCT_AGGS = set()
+_STRUCT_AGGS = {"CORRELATION"}
 _INTERVAL_AGGS = set()
 _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
                  "NTH", "COLLECT"}
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index e00d964f989..5f04be89be8 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import collections
+import itertools
 import pickle
 import warnings
 
@@ -781,6 +782,36 @@ def median(self):
         """Get the column-wise median of the values in each group."""
         return self.agg("median")
 
+    def corr(self):
+        # breakpoint()
+        _cols = self.grouping.values.columns.tolist()
+        new_df = cudf.DataFrame({self.grouping.keys.names: self.grouping.keys})
+        new_df._data.multiindex = False
+        for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
+            new_df[i] = cudf.DataFrame(
+                {"x": self.obj[i[0]], "y": self.obj[i[1]]}
+            ).to_struct()
+        new_gb = new_df.groupby(self.grouping)
+        gb_corr = new_gb.agg("corr")
+
+        cols_list = []
+        for i, x in enumerate(_cols):
+            for j, y in enumerate(_cols):
+                if i > j:
+                    cols_list.append((_cols[j], _cols[i]))
+                else:
+                    cols_list.append((_cols[i], _cols[j]))
+        cols_split = [
+            cols_list[i : i + 3] for i in range(0, len(cols_list), 3)
+        ]
+
+        res = cudf.DataFrame()
+        for i, x in zip(cols_split, _cols):
+            ic = gb_corr.loc[:, i].interleave_columns()
+            res[x] = ic
+
+        return res
+
     def var(self, ddof=1):
         """Compute the column-wise variance of the values in each group.
 

From 077a1872a785374904d92d38497cce284908c030 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 4 Oct 2021 21:17:09 +0530
Subject: [PATCH 48/79] add more null cases for correlation tests

---
 cpp/tests/groupby/correlation_tests.cpp | 60 +++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/cpp/tests/groupby/correlation_tests.cpp b/cpp/tests/groupby/correlation_tests.cpp
index db238ae5998..90d230ef1eb 100644
--- a/cpp/tests/groupby/correlation_tests.cpp
+++ b/cpp/tests/groupby/correlation_tests.cpp
@@ -119,16 +119,16 @@ TYPED_TEST(groupby_correlation_test, null_keys_and_values)
   using V = TypeParam;
   using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
 
+  // clang-format off
   fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
                                      {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
-  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3, -1, 1, 4, 4},
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
                                      {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
-  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3, -1, 0, 2, 2});
+  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3,-1, 0, 2, 2});
+  // clang-format on
   auto vals = structs{{val0, val1}};
 
-  //                                        { 1, 1,     2, 2, 2,   3, 3,       4}
   fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
-  //                                        { 3, 6,     1, 4, 9,   2, 8,       3}
   fixed_width_column_wrapper<R> expect_vals(
     {1.0, 0.6, std::numeric_limits<double>::quiet_NaN(), 0.}, {1, 1, 1, 0});
 
@@ -137,6 +137,58 @@ TYPED_TEST(groupby_correlation_test, null_keys_and_values)
   test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
 }
 
+TYPED_TEST(groupby_correlation_test, null_values_same)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3,-1, 0, 2, 2},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  // clang-format on
+  auto vals = structs{{val0, val1}};
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  fixed_width_column_wrapper<R> expect_vals(
+    {1.0, 0.6, std::numeric_limits<double>::quiet_NaN(), 0.}, {1, 1, 1, 0});
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+// keys=[1, 1, 1, 2, 2, 2, 2,   3, N, 3, 4]
+// val0=[N, 2, 3, 1, N, 3, 4,   1,-1, 1, 4]
+// val1=[N, 2, 3, 2,-1, 6,-6/1, 1,-1, 0, N]
+// corr=[    1.0,       -0.5/0, NAN,     NAN]
+TYPED_TEST(groupby_correlation_test, null_values_different)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::CORRELATION>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
+                                     {0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1});
+  fixed_width_column_wrapper<V> val1({1, 2, 1, 2,-1, 6, 3,-1, 0, 1, 2},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  // clang-format on
+  auto vals = structs{{val0, val1}};
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  fixed_width_column_wrapper<R> expect_vals({1.0, 0., std::numeric_limits<double>::quiet_NaN(), 0.},
+                                            {1, 1, 1, 0});
+
+  auto agg =
+    cudf::make_correlation_aggregation<groupby_aggregation>(cudf::correlation_type::PEARSON);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
 struct groupby_dictionary_correlation_test : public cudf::test::BaseFixture {
 };
 

From e3f47c13c13bb2c41769163001e00253e73fe1be Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 4 Oct 2021 23:23:08 +0530
Subject: [PATCH 49/79] add covariance tests

---
 cpp/tests/CMakeLists.txt               |   1 +
 cpp/tests/groupby/covariance_tests.cpp | 199 +++++++++++++++++++++++++
 2 files changed, 200 insertions(+)
 create mode 100644 cpp/tests/groupby/covariance_tests.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index ea7338b4da1..d32b18cb929 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -62,6 +62,7 @@ ConfigureTest(GROUPBY_TEST
     groupby/correlation_tests.cpp
     groupby/count_scan_tests.cpp
     groupby/count_tests.cpp
+    groupby/covariance_tests.cpp
     groupby/groups_tests.cpp
     groupby/keys_tests.cpp
     groupby/lists_tests.cpp
diff --git a/cpp/tests/groupby/covariance_tests.cpp b/cpp/tests/groupby/covariance_tests.cpp
new file mode 100644
index 00000000000..039fce16222
--- /dev/null
+++ b/cpp/tests/groupby/covariance_tests.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_list_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <limits>
+#include <vector>
+
+using namespace cudf::test::iterators;
+namespace cudf {
+namespace test {
+
+using structs = structs_column_wrapper;
+
+template <typename V>
+struct groupby_covariance_test : public cudf::test::BaseFixture {
+};
+
+using supported_types = RemoveIf<ContainedIn<Types<bool>>, cudf::test::NumericTypes>;
+
+TYPED_TEST_CASE(groupby_covariance_test, supported_types);
+using K = int32_t;
+
+TYPED_TEST(groupby_covariance_test, basic)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  auto keys     = fixed_width_column_wrapper<K>{{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}};
+  auto member_0 = fixed_width_column_wrapper<V>{{1, 1, 1, 2, 2, 3, 3, 1, 1, 4}};
+  auto member_1 = fixed_width_column_wrapper<V>{{1, 1, 1, 2, 0, 3, 3, 1, 1, 2}};
+  auto vals     = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+  fixed_width_column_wrapper<R, double> expect_vals{{1.0, 1.0, 0.0}};
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, empty_cols)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  fixed_width_column_wrapper<K> keys{};
+  fixed_width_column_wrapper<V> member_0{}, member_1{};
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, zero_valid_keys)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  fixed_width_column_wrapper<K> keys({1, 2, 3}, all_nulls());
+  fixed_width_column_wrapper<V> member_0{3, 4, 5}, member_1{6, 7, 8};
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, zero_valid_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  fixed_width_column_wrapper<K> keys{1, 1, 1};
+  fixed_width_column_wrapper<V> member_0({3, 4, 5}, all_nulls());
+  fixed_width_column_wrapper<V> member_1({3, 4, 5}, all_nulls());
+  auto vals = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1};
+  fixed_width_column_wrapper<R> expect_vals({0}, all_nulls());
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, null_keys_and_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3,-1, 0, 2, 2});
+  // clang-format on
+  auto vals = structs{{val0, val1}};
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  fixed_width_column_wrapper<R> expect_vals({0.5, 1.0, 0.0, -0.}, {1, 1, 1, 0});
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, null_values_same)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  fixed_width_column_wrapper<V> val1({1, 1, 1, 2, 0, 3, 3,-1, 0, 2, 2},
+                                     {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+  // clang-format on
+  auto vals = structs{{val0, val1}};
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  fixed_width_column_wrapper<R> expect_vals({0.5, 1.0, 0.0, -0.}, {1, 1, 1, 0});
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+TYPED_TEST(groupby_covariance_test, null_values_different)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys({1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> val0({9, 1, 1, 2, 2, 3, 3,-1, 1, 4, 4},
+                                     {0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1});
+  fixed_width_column_wrapper<V> val1({1, 2, 1, 2,-1, 3, 3,-1, 0, 4, 2},
+                                     {0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0});
+  // clang-format on
+  auto vals = structs{{val0, val1}};
+
+  fixed_width_column_wrapper<K> expect_keys({1, 2, 3, 4}, no_nulls());
+  fixed_width_column_wrapper<R> expect_vals(
+    {std::numeric_limits<double>::quiet_NaN(), 1.5, 0.0, -0.}, {0, 1, 1, 0});
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+struct groupby_dictionary_covariance_test : public cudf::test::BaseFixture {
+};
+
+TEST_F(groupby_dictionary_covariance_test, basic)
+{
+  using V = int16_t;
+  using R = cudf::detail::target_type_t<V, aggregation::COVARIANCE>;
+
+  auto keys     = fixed_width_column_wrapper<K>{{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}};
+  auto member_0 = dictionary_column_wrapper<V>{{1, 1, 1, 2, 2, 3, 3, 1, 1, 4}};
+  auto member_1 = dictionary_column_wrapper<V>{{1, 1, 1, 2, 3, -3, 3, 1, 1, 2}};
+  auto vals     = structs{{member_0, member_1}};
+
+  fixed_width_column_wrapper<K> expect_keys{1, 2, 3};
+  fixed_width_column_wrapper<R, double> expect_vals{{1.0, -0.5, 0.0}};
+
+  auto agg = cudf::make_covariance_aggregation<groupby_aggregation>();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), force_use_sort_impl::YES);
+}
+
+}  // namespace test
+}  // namespace cudf

From 9c5b81d7b2524ce65b324ca379ab6ed4a51e56f7 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 6 Oct 2021 16:02:51 -0700
Subject: [PATCH 50/79] fixed merge conflict in result_cache.hpp

---
 cpp/include/cudf/detail/aggregation/result_cache.hpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index e758feb1fde..41f5c19f06a 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -34,11 +34,7 @@ struct pair_column_aggregation_equal_to {
 };
 
 struct pair_column_aggregation_hash {
-<<<<<<< HEAD
-  size_t operator()(std::pair<column_view, aggregation const&> const& key) const noexcept
-=======
   size_t operator()(std::pair<column_view, aggregation const&> const& key) const
->>>>>>> 3f09f967fe07246138ff6cfbed84675960a75f94
   {
     return hash_combine(shallow_hash(key.first), key.second.do_hash());
   }

From 8426f563200ae365bbd79dbf31e56e9d107e84f1 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 8 Oct 2021 22:23:00 +0530
Subject: [PATCH 51/79] Apply suggestions from code review

Co-authored-by: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
---
 cpp/src/groupby/sort/aggregate.cpp        |  2 +-
 cpp/src/groupby/sort/group_correlation.cu | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 7cddfef5712..e471fccda07 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -534,7 +534,7 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
  *
  * @param column_0 The first column
  * @param column_1 The second column
- * @return tuple with new null mask (if null masks if input differ) and new column views
+ * @return tuple with new null mask (if null masks of input differ) and new column views
  */
 auto column_view_with_common_nulls(column_view const& column_0, column_view const& column_1)
 {
diff --git a/cpp/src/groupby/sort/group_correlation.cu b/cpp/src/groupby/sort/group_correlation.cu
index 0c11f62ea3b..daf99563270 100644
--- a/cpp/src/groupby/sort/group_correlation.cu
+++ b/cpp/src/groupby/sort/group_correlation.cu
@@ -88,17 +88,17 @@ struct covariance_transform {
     if (d_values_0.is_null(i) or d_values_1.is_null(i)) return 0.0;
 
     // This has to be device dispatch because x and y type may differ
-    auto x = value(d_values_0, i);
-    auto y = value(d_values_1, i);
+    auto const x = value(d_values_0, i);
+    auto const y = value(d_values_1, i);
 
-    size_type group_idx  = d_group_labels[i];
-    size_type group_size = d_group_sizes[group_idx];
+    size_type const group_idx  = d_group_labels[i];
+    size_type const group_size = d_group_sizes[group_idx];
 
     // prevent divide by zero error
     if (group_size == 0 or group_size - ddof <= 0) return 0.0;
 
-    ResultType xmean = d_means_0[group_idx];
-    ResultType ymean = d_means_1[group_idx];
+    ResultType const xmean = d_means_0[group_idx];
+    ResultType const ymean = d_means_1[group_idx];
     return (x - xmean) * (y - ymean) / (group_size - ddof);
   }
 };

From f7470d2e8fb4f1054cfe3cf9596f949909aaf1a1 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 13 Oct 2021 10:11:58 -0700
Subject: [PATCH 52/79] fixing multiindex to match pandas behavior

---
 python/cudf/cudf/core/groupby/groupby.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 34335a52cdf..8597127fbe1 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -809,6 +809,13 @@ def corr(self):
             ic = gb_corr.loc[:, i].interleave_columns()
             res[x] = ic
 
+        _index = cudf.DataFrame(
+            {
+                self.grouping.keys.names[0]: self.grouping.keys,
+                None: _cols * (len(_cols)),
+            }
+        )
+        res.index = _index
         return res
 
     def var(self, ddof=1):

From 407b616ab523c50890743ff71a85449be6bd12ef Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 19 Oct 2021 13:56:56 -0700
Subject: [PATCH 53/79] adding tests

---
 python/cudf/cudf/tests/test_dataframe.py | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index c1eade0fcdc..993e20c7ac7 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8932,3 +8932,29 @@ def test_frame_series_where_other(data):
     expected = gdf.where(gdf["b"] == 1, 0)
     actual = pdf.where(pdf["b"] == 1, 0)
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        },
+        {
+            "id": [0] * 4 + [1] * 3,
+            "a": [10, 3, 4, 2, -3, 9, 10],
+            "b": [10, 23, -4, 2, -3, 9, 19],
+            "c": [10, -23, -4, 21, -3, 19, 19],
+        },
+    ],
+)
+def test_dataframe_pearson_corr(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = gdf.groupby("id").corr()
+    actual = pdf.groupby("id").corr()
+    assert_eq(expected, actual)

From c58cff3e81e5e7e0a1cb50e3fcfb976cb0067b35 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 20 Oct 2021 22:13:06 -0700
Subject: [PATCH 54/79] added method parameter to corr()

---
 python/cudf/cudf/core/groupby/groupby.py | 70 +++++++++++++++++++++++-
 python/cudf/cudf/tests/test_dataframe.py |  4 +-
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 8597127fbe1..9520a982899 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -781,8 +781,74 @@ def median(self):
         """Get the column-wise median of the values in each group."""
         return self.agg("median")
 
-    def corr(self):
-        # breakpoint()
+    def corr(self, method="pearson"):
+        """
+        Compute pairwise correlation of columns, excluding NA/null values.
+
+        Parameters
+        ----------
+        method: Method of correlation
+            {‘pearson’, ‘kendall’, ‘spearman’} or callable
+
+            pearson : standard correlation coefficient
+
+            kendall : Kendall Tau correlation coefficient
+
+            spearman : Spearman rank correlation
+
+            callable: callable with input two 1d ndarrays and returning
+            float. Note that the returned matrix from corr will have 1
+            along the diagonals and will be symmetric regardless of the
+            callable’s behavior.
+
+        min_periods: int, optional
+            Minimum number of observations required per pair of columns
+            to have a valid result.
+
+        Returns
+        ----------
+        DataFrame
+            Correlation matrix.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> gdf = cudf.DataFrame({
+        ...             "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+        ...             "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+        ...             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        ...             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]})
+        >>> gdf
+        id  val1  val2  val3
+        0  a     5     4     4
+        1  a     4     5     5
+        2  a     6     6     6
+        3  b     4     1     1
+        4  b     8     2     2
+        5  b     7     9     9
+        6  c     4     8     8
+        7  c     5     5     5
+        8  c     2     1     1
+        >>> gdf.groupby("id").corr(method="pearson")
+                    val1      val2      val3
+        id
+        a   val1  1.000000  0.500000  0.500000
+            val2  0.500000  1.000000  1.000000
+            val3  0.500000  1.000000  1.000000
+        b   val1  1.000000  0.385727  0.385727
+            val2  0.385727  1.000000  1.000000
+            val3  0.385727  1.000000  1.000000
+        c   val1  1.000000  0.714575  0.714575
+            val2  0.714575  1.000000  1.000000
+            val3  0.714575  1.000000  1.000000
+
+        """
+
+        if method in ["kendall", "spearman"]:
+            raise NotImplementedError(
+                "Only pearson correlation is currently supported"
+            )
+
         _cols = self.grouping.values.columns.tolist()
         new_df = cudf.DataFrame({self.grouping.keys.names: self.grouping.keys})
         new_df._data.multiindex = False
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 993e20c7ac7..67b101901d1 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8955,6 +8955,6 @@ def test_dataframe_pearson_corr(data):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
-    expected = gdf.groupby("id").corr()
-    actual = pdf.groupby("id").corr()
+    expected = gdf.groupby("id").corr("pearson")
+    actual = pdf.groupby("id").corr("pearson")
     assert_eq(expected, actual)

From 70be97bdc00df6c27fde754d0580e177ada78062 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 27 Oct 2021 22:56:42 -0700
Subject: [PATCH 55/79] create multiindex using groupby correlated index info

---
 python/cudf/cudf/core/groupby/groupby.py | 42 +++++++++++-------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index f49b6f0fc49..6e01b5f0eff 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -781,27 +781,18 @@ def median(self):
         """Get the column-wise median of the values in each group."""
         return self.agg("median")
 
-    def corr(self, method="pearson"):
+    def corr(self, method="pearson", min_periods=1):
         """
         Compute pairwise correlation of columns, excluding NA/null values.
 
         Parameters
         ----------
-        method: Method of correlation
-            {‘pearson’, ‘kendall’, ‘spearman’} or callable
+        method: Method of correlation, default 'Pearson'
+            Pearson: standard correlation coefficient.
+            Kendall, Spearman correlation and callable method
+            not yet supported.
 
-            pearson : standard correlation coefficient
-
-            kendall : Kendall Tau correlation coefficient
-
-            spearman : Spearman rank correlation
-
-            callable: callable with input two 1d ndarrays and returning
-            float. Note that the returned matrix from corr will have 1
-            along the diagonals and will be symmetric regardless of the
-            callable’s behavior.
-
-        min_periods: int, optional
+        min_periods: int, default 1
             Minimum number of observations required per pair of columns
             to have a valid result.
 
@@ -844,11 +835,12 @@ def corr(self, method="pearson"):
 
         """
 
-        if method in ["kendall", "spearman"]:
+        if not method.lower() in ["pearson"]:
             raise NotImplementedError(
                 "Only pearson correlation is currently supported"
             )
-
+        # create all combinations of the struct columns-pairs to be correlated
+        # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
         _cols = self.grouping.values.columns.tolist()
         new_df = cudf.DataFrame({self.grouping.keys.names: self.grouping.keys})
         new_df._data.multiindex = False
@@ -858,7 +850,7 @@ def corr(self, method="pearson"):
             ).to_struct()
         new_gb = new_df.groupby(self.grouping)
         gb_corr = new_gb.agg("corr")
-
+        # ensure that column-pair labels are arranged in ascending order
         cols_list = []
         for i, x in enumerate(_cols):
             for j, y in enumerate(_cols):
@@ -867,18 +859,22 @@ def corr(self, method="pearson"):
                 else:
                     cols_list.append((_cols[i], _cols[j]))
         cols_split = [
-            cols_list[i : i + 3] for i in range(0, len(cols_list), 3)
+            cols_list[i : i + len(_cols)]
+            for i in range(0, len(cols_list), len(_cols))
         ]
-
+        # interleave: combine the correlation results of each column-pair
+        # into a single column
         res = cudf.DataFrame()
         for i, x in zip(cols_split, _cols):
             ic = gb_corr.loc[:, i].interleave_columns()
             res[x] = ic
-
+        # create a multiindex for the groupby correlated dataframe,
+        # to match pandas behavior
+        _idx = gb_corr._index.to_pandas().values.tolist()
         _index = cudf.DataFrame(
             {
-                self.grouping.keys.names[0]: self.grouping.keys,
-                None: _cols * (len(_cols)),
+                gb_corr.index.name: sorted(_idx * len(_cols)),
+                None: _cols * (len(gb_corr.index)),
             }
         )
         res.index = _index

From f906b7930a391215163ca6cda76a6ec0e0323379 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 27 Oct 2021 22:58:34 -0700
Subject: [PATCH 56/79] added tests - one, two, three columns cases

---
 python/cudf/cudf/tests/test_dataframe.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 67b101901d1..5ba0f94e810 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8943,6 +8943,12 @@ def test_frame_series_where_other(data):
             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
         },
+        {
+            "id": ["a", "a", "b", "b", "c", "c"],
+            "val1": [5, 4, 6, 8, 7, 2],
+            "val2": [4, 5, 1, 2, 9, 5],
+        },
+        {"id": ["a", "a", "b", "b", "c", "c"], "val": [10, 3, 4, 2, -3, 9]},
         {
             "id": [0] * 4 + [1] * 3,
             "a": [10, 3, 4, 2, -3, 9, 10],
@@ -8958,3 +8964,10 @@ def test_dataframe_pearson_corr(data):
     expected = gdf.groupby("id").corr("pearson")
     actual = pdf.groupby("id").corr("pearson")
     assert_eq(expected, actual)
+
+
+def test_pearson_corr_empty_dataframe():
+    with pytest.raises(
+        ValueError, match="Grouper and object must have same length"
+    ):
+        cudf.DataFrame().corr("pearson")

From d800c8951a2884647028e5d317ef89ef83f66e5b Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 3 Nov 2021 15:16:42 -0700
Subject: [PATCH 57/79] added min_periods param. to cython layer

---
 python/cudf/cudf/_lib/aggregation.pyx     | 42 +++++++++++++++++++----
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  5 +--
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 76eb3ba3bb2..f2e17965124 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -1,6 +1,6 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
-from enum import Enum
+from enum import Enum, IntEnum
 
 import numba
 import numpy as np
@@ -30,6 +30,7 @@ from cudf._lib.types import Interpolation
 
 cimport cudf._lib.cpp.aggregation as libcudf_aggregation
 cimport cudf._lib.cpp.types as libcudf_types
+from cudf._lib.cpp.aggregation cimport underlying_type_t_correlation_type
 
 import cudf
 
@@ -60,6 +61,21 @@ class AggregationKind(Enum):
     CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION
 
 
+class CorrelationType(IntEnum):
+    PEARSON = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.PEARSON
+    )
+    KENDALL = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.KENDALL
+    )
+    SPEARMAN = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.SPEARMAN
+    )
+
+
 cdef class Aggregation:
     """A Cython wrapper for aggregations.
 
@@ -323,11 +339,18 @@ cdef class Aggregation:
         return agg
 
     @classmethod
-    def corr(cls):
+    def corr(cls, method, libcudf_types.size_type min_periods):
         cdef Aggregation agg = cls()
+        cdef libcudf_aggregation.correlation_type c_method = (
+            <libcudf_aggregation.correlation_type> (
+                <underlying_type_t_correlation_type> (
+                    CorrelationType[method.upper()]
+                )
+            )
+        )
         agg.c_obj = move(
             libcudf_aggregation.make_correlation_aggregation[aggregation](
-                libcudf_aggregation.correlation_type.PEARSON
+                c_method, min_periods
             ))
         return agg
 
@@ -703,16 +726,23 @@ cdef class GroupbyAggregation:
         return agg
 
     @classmethod
-    def corr(cls):
+    def corr(cls, method, libcudf_types.size_type min_periods):
         cdef GroupbyAggregation agg = cls()
+        cdef libcudf_aggregation.correlation_type c_method = (
+            <libcudf_aggregation.correlation_type> (
+                <underlying_type_t_correlation_type> (
+                    CorrelationType[method.upper()]
+                )
+            )
+        )
         agg.c_obj = move(
             libcudf_aggregation.
             make_correlation_aggregation[groupby_aggregation](
-                libcudf_aggregation.correlation_type.PEARSON
+                c_method, min_periods
             ))
-
         return agg
 
+
 cdef class GroupbyScanAggregation:
     """A Cython wrapper for groupby scan aggregations.
 
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 04deeb877d1..31839ee5fcc 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -1,5 +1,5 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
-
+from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -11,6 +11,7 @@ from cudf._lib.cpp.types cimport (
     size_type,
 )
 
+ctypedef int32_t underlying_type_t_correlation_type
 
 cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
 
@@ -115,4 +116,4 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         data_type output_type) except +
 
     cdef unique_ptr[T] make_correlation_aggregation[T](
-        correlation_type type) except +
+        correlation_type type, size_type min_periods) except +

From db8b47f94e002a4a08ab8b4f37ee7db21b16ceaa Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 3 Nov 2021 15:23:22 -0700
Subject: [PATCH 58/79] create new_df from grouping keys data

---
 python/cudf/cudf/core/groupby/groupby.py | 10 +++++++---
 python/cudf/cudf/tests/test_dataframe.py |  7 -------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6e01b5f0eff..98752eb3928 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -71,6 +71,8 @@ def __init__(
         """
         self.obj = obj
         self._as_index = as_index
+        self._by = by
+        self._level = level
         self._sort = sort
         self._dropna = dropna
 
@@ -842,14 +844,16 @@ def corr(self, method="pearson", min_periods=1):
         # create all combinations of the struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
         _cols = self.grouping.values.columns.tolist()
-        new_df = cudf.DataFrame({self.grouping.keys.names: self.grouping.keys})
+        # breakpoint()
+        new_df = cudf.DataFrame._from_data(self.grouping.keys._data)
         new_df._data.multiindex = False
         for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
             new_df[i] = cudf.DataFrame(
                 {"x": self.obj[i[0]], "y": self.obj[i[1]]}
             ).to_struct()
-        new_gb = new_df.groupby(self.grouping)
-        gb_corr = new_gb.agg("corr")
+        new_gb = new_df.groupby(by=self._by, level=self._level)
+        # breakpoint()
+        gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
         # ensure that column-pair labels are arranged in ascending order
         cols_list = []
         for i, x in enumerate(_cols):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 5ba0f94e810..23416ed63ac 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8964,10 +8964,3 @@ def test_dataframe_pearson_corr(data):
     expected = gdf.groupby("id").corr("pearson")
     actual = pdf.groupby("id").corr("pearson")
     assert_eq(expected, actual)
-
-
-def test_pearson_corr_empty_dataframe():
-    with pytest.raises(
-        ValueError, match="Grouper and object must have same length"
-    ):
-        cudf.DataFrame().corr("pearson")

From 36baa30d635e40b3c76c439e751366c71b56909e Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 9 Nov 2021 13:30:52 -0800
Subject: [PATCH 59/79] updated copyright years

---
 python/cudf/cudf/_lib/aggregation.pxd | 2 +-
 python/cudf/cudf/_lib/aggregation.pyx | 2 +-
 python/cudf/cudf/_lib/groupby.pyx     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pxd b/python/cudf/cudf/_lib/aggregation.pxd
index 84bcaed1b36..85a729ad2a3 100644
--- a/python/cudf/cudf/_lib/aggregation.pxd
+++ b/python/cudf/cudf/_lib/aggregation.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index f2e17965124..68f7101b6ee 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from enum import Enum, IntEnum
 
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index b093d4cf364..314542c9549 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from collections import defaultdict
 

From 54ef35bf8cab57a37ea2ca53ec73ff46761e4772 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 9 Nov 2021 13:33:39 -0800
Subject: [PATCH 60/79] added test for nulls and unsupoorted methods

---
 python/cudf/cudf/tests/test_dataframe.py | 35 +++++++++++++++++++-----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 1323282e573..1ce84c2a028 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8943,17 +8943,20 @@ def test_frame_series_where_other(data):
             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
         },
-        {
-            "id": ["a", "a", "b", "b", "c", "c"],
-            "val1": [5, 4, 6, 8, 7, 2],
-            "val2": [4, 5, 1, 2, 9, 5],
-        },
-        {"id": ["a", "a", "b", "b", "c", "c"], "val": [10, 3, 4, 2, -3, 9]},
         {
             "id": [0] * 4 + [1] * 3,
             "a": [10, 3, 4, 2, -3, 9, 10],
             "b": [10, 23, -4, 2, -3, 9, 19],
-            "c": [10, -23, -4, 21, -3, 19, 19],
+        },
+        {"id": ["a", "a", "b", "b", "c", "c"], "val": [10, 3, 4, 2, -3, 9]},
+        {
+            "id": ["a", "a", "b", "b", "c", "c"],
+            "val": [None, None, None, None, None, None],
+        },
+        {
+            "id": ["a", "a", "b", "b", "c", "c"],
+            "val1": [None, 4, 6, 8, None, 2],
+            "val2": [4, 5, None, 2, 9, None],
         },
     ],
 )
@@ -8964,3 +8967,21 @@ def test_dataframe_pearson_corr(data):
     expected = gdf.groupby("id").corr("pearson")
     actual = pdf.groupby("id").corr("pearson")
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("method", ["kendall", "spearman"])
+def test_dataframe_pearson_corr_unsupported_methods(method):
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        }
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Only pearson correlation is currently supported",
+    ):
+        gdf.groupby("id").corr(method)

From 1e1431be0d4c5136069a237ac4b63ac9f5c4df50 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 9 Nov 2021 19:46:18 -0800
Subject: [PATCH 61/79] minor review-fixes

---
 python/cudf/cudf/core/groupby/groupby.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 98752eb3928..c9f69252001 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -789,12 +789,12 @@ def corr(self, method="pearson", min_periods=1):
 
         Parameters
         ----------
-        method: Method of correlation, default 'Pearson'
+        method: Method of correlation
             Pearson: standard correlation coefficient.
             Kendall, Spearman correlation and callable method
             not yet supported.
 
-        min_periods: int, default 1
+        min_periods: int, optional
             Minimum number of observations required per pair of columns
             to have a valid result.
 
@@ -841,7 +841,8 @@ def corr(self, method="pearson", min_periods=1):
             raise NotImplementedError(
                 "Only pearson correlation is currently supported"
             )
-        # create all combinations of the struct columns-pairs to be correlated
+        # create expanded dataframe consisting all combinations of the
+        # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
         _cols = self.grouping.values.columns.tolist()
         # breakpoint()

From ab6cd9532569e5fdc65854e0d1c06e13996a93f0 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 9 Nov 2021 19:55:51 -0800
Subject: [PATCH 62/79] added tests for: invalid types, empty dataframe and
 multiindex. All failing

---
 python/cudf/cudf/tests/test_dataframe.py | 62 ++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 1ce84c2a028..c8e4024a666 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8958,14 +8958,21 @@ def test_frame_series_where_other(data):
             "val1": [None, 4, 6, 8, None, 2],
             "val2": [4, 5, None, 2, 9, None],
         },
+        {"id": ["a"], "val1": [2], "val2": [3]},
     ],
 )
-def test_dataframe_pearson_corr(data):
+@pytest.mark.parametrize(
+    "min_periods", [0, 1, 2, 3, 4],
+)
+def test_dataframe_pearson_corr(data, min_periods):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
-    expected = gdf.groupby("id").corr("pearson")
-    actual = pdf.groupby("id").corr("pearson")
+    expected = gdf.groupby("id").corr(
+        method="pearson", min_periods=min_periods
+    )
+    actual = pdf.groupby("id").corr(method="pearson", min_periods=min_periods)
+
     assert_eq(expected, actual)
 
 
@@ -8985,3 +8992,52 @@ def test_dataframe_pearson_corr_unsupported_methods(method):
         match="Only pearson correlation is currently supported",
     ):
         gdf.groupby("id").corr(method)
+
+
+def test_pearson_corr_empty_dataframe():
+    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
+    pdf = gdf.to_pandas()
+
+    expected = gdf.groupby("id").corr("pearson")
+    actual = pdf.groupby("id").corr("pearson")
+
+    assert_eq(
+        expected, actual
+    )  # fails: DataFrame.index classes are not equivalent
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        },
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        },
+    ],
+)
+@pytest.mark.parametrize("groupby", ["id", "val1", "val2"])
+def test_pearson_corr_invalid_column_types(data, groupby):
+    try:
+        cudf.DataFrame(data).groupby(groupby).corr("pearson")
+    except RuntimeError as e:
+        if "Unsupported type-agg combination" in str(e):
+            raise TypeError(
+                "Correlation accepts only numerical column-pairs"
+            ) from e
+
+
+def test_pearson_corr_multiindex_dataframe():
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]}
+    ).set_index(["a", "b"])
+
+    expected = gdf.groupby(level="a").corr("pearson")
+    actual = gdf.to_pandas().groupby(level="a").corr("pearson")
+
+    assert_eq(expected, actual)

From 34d412e22916b11873bf88fe10d2d91f7dcbc11d Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 10 Nov 2021 01:28:12 -0800
Subject: [PATCH 63/79] added test for grouping by multiple columns, passes

---
 python/cudf/cudf/tests/test_dataframe.py | 77 +++++++++++++-----------
 1 file changed, 43 insertions(+), 34 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index c8e4024a666..fe800fefac4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8935,49 +8935,58 @@ def test_frame_series_where_other(data):
 
 
 @pytest.mark.parametrize(
-    "data",
+    "data, gkey",
     [
-        {
-            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
-            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
-        },
-        {
-            "id": [0] * 4 + [1] * 3,
-            "a": [10, 3, 4, 2, -3, 9, 10],
-            "b": [10, 23, -4, 2, -3, 9, 19],
-        },
-        {"id": ["a", "a", "b", "b", "c", "c"], "val": [10, 3, 4, 2, -3, 9]},
-        {
-            "id": ["a", "a", "b", "b", "c", "c"],
-            "val": [None, None, None, None, None, None],
-        },
-        {
-            "id": ["a", "a", "b", "b", "c", "c"],
-            "val1": [None, 4, 6, 8, None, 2],
-            "val2": [4, 5, None, 2, 9, None],
-        },
-        {"id": ["a"], "val1": [2], "val2": [3]},
+        (
+            {
+                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            },
+            ["id", "val1", "val2"],
+        ),
+        (
+            {
+                "id": [0] * 4 + [1] * 3,
+                "a": [10, 3, 4, 2, -3, 9, 10],
+                "b": [10, 23, -4, 2, -3, 9, 19],
+            },
+            ["id", "a"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val": [None, None, None, None, None, None],
+            },
+            ["id"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val1": [None, 4, 6, 8, None, 2],
+                "val2": [4, 5, None, 2, 9, None],
+            },
+            ["id"],
+        ),
+        ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]),
     ],
 )
 @pytest.mark.parametrize(
-    "min_periods", [0, 1, 2, 3, 4],
+    "min_per", [0, 1, 2, 3, 4],
 )
-def test_dataframe_pearson_corr(data, min_periods):
+def test_dataframe_pearson_corr(data, gkey, min_per):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
-    expected = gdf.groupby("id").corr(
-        method="pearson", min_periods=min_periods
-    )
-    actual = pdf.groupby("id").corr(method="pearson", min_periods=min_periods)
+    expected = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+    actual = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
 
     assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("method", ["kendall", "spearman"])
-def test_dataframe_pearson_corr_unsupported_methods(method):
+def test_pearson_corr_unsupported_methods(method):
     gdf = cudf.DataFrame(
         {
             "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
@@ -8994,7 +9003,7 @@ def test_dataframe_pearson_corr_unsupported_methods(method):
         gdf.groupby("id").corr(method)
 
 
-def test_pearson_corr_empty_dataframe():
+def test_pearson_corr_empty_columns():
     gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
     pdf = gdf.to_pandas()
 
@@ -9021,10 +9030,10 @@ def test_pearson_corr_empty_dataframe():
         },
     ],
 )
-@pytest.mark.parametrize("groupby", ["id", "val1", "val2"])
-def test_pearson_corr_invalid_column_types(data, groupby):
+@pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
+def test_pearson_corr_invalid_column_types(data, gkey):
     try:
-        cudf.DataFrame(data).groupby(groupby).corr("pearson")
+        cudf.DataFrame(data).groupby(gkey).corr("pearson")
     except RuntimeError as e:
         if "Unsupported type-agg combination" in str(e):
             raise TypeError(

From b6420494f63b7622525638572c6271745723b4fc Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 10 Nov 2021 01:34:06 -0800
Subject: [PATCH 64/79] fixes multiindex to match pd for multiple
 groupings-cases

---
 python/cudf/cudf/core/groupby/groupby.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index c9f69252001..f80ff3e3d94 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -841,11 +841,11 @@ def corr(self, method="pearson", min_periods=1):
             raise NotImplementedError(
                 "Only pearson correlation is currently supported"
             )
+
         # create expanded dataframe consisting all combinations of the
         # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
         _cols = self.grouping.values.columns.tolist()
-        # breakpoint()
         new_df = cudf.DataFrame._from_data(self.grouping.keys._data)
         new_df._data.multiindex = False
         for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
@@ -853,8 +853,8 @@ def corr(self, method="pearson", min_periods=1):
                 {"x": self.obj[i[0]], "y": self.obj[i[1]]}
             ).to_struct()
         new_gb = new_df.groupby(by=self._by, level=self._level)
-        # breakpoint()
         gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
+
         # ensure that column-pair labels are arranged in ascending order
         cols_list = []
         for i, x in enumerate(_cols):
@@ -867,22 +867,23 @@ def corr(self, method="pearson", min_periods=1):
             cols_list[i : i + len(_cols)]
             for i in range(0, len(cols_list), len(_cols))
         ]
-        # interleave: combine the correlation results of each column-pair
+
+        # interleave: combine the correlation results for each column-pair
         # into a single column
         res = cudf.DataFrame()
         for i, x in zip(cols_split, _cols):
             ic = gb_corr.loc[:, i].interleave_columns()
             res[x] = ic
+
         # create a multiindex for the groupby correlated dataframe,
         # to match pandas behavior
         _idx = gb_corr._index.to_pandas().values.tolist()
         _index = cudf.DataFrame(
-            {
-                gb_corr.index.name: sorted(_idx * len(_cols)),
-                None: _cols * (len(gb_corr.index)),
-            }
+            sorted(_idx * len(_cols)), columns=gb_corr.index.names
         )
+        _index[None] = _cols * (len(gb_corr.index))
         res.index = _index
+
         return res
 
     def var(self, ddof=1):

From 0c2e17ec3ee59d8eee7aa1fd9fd5210aadad6814 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 10 Nov 2021 13:33:04 -0800
Subject: [PATCH 65/79] changes:call with ashwin-create MI for non empty
 results, capture runtime error, etc

---
 python/cudf/cudf/core/groupby/groupby.py | 46 +++++++++++++++++-------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index f80ff3e3d94..6bc04094d66 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -13,7 +13,8 @@
 from cudf._typing import DataFrameOrSeries
 from cudf.api.types import is_list_like
 from cudf.core.abc import Serializable
-from cudf.core.column.column import arange
+from cudf.core.column.column import arange, as_column
+from cudf.core.index import _index_from_data
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
@@ -845,15 +846,32 @@ def corr(self, method="pearson", min_periods=1):
         # create expanded dataframe consisting all combinations of the
         # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
+        # breakpoint()
         _cols = self.grouping.values.columns.tolist()
-        new_df = cudf.DataFrame._from_data(self.grouping.keys._data)
-        new_df._data.multiindex = False
+
+        if self._by:
+            new_df = cudf.DataFrame._from_data(self.grouping.keys._data)
+            new_df._data.multiindex = False
+        else:
+            new_df = cudf.DataFrame._from_data(
+                {}, index=_index_from_data(self.grouping.keys._data)
+            )
+
         for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
-            new_df[i] = cudf.DataFrame(
-                {"x": self.obj[i[0]], "y": self.obj[i[1]]}
+            new_df._data[i] = cudf.DataFrame._from_data(
+                {"x": self.obj._data[i[0]], "y": self.obj._data[i[1]]}
             ).to_struct()
         new_gb = new_df.groupby(by=self._by, level=self._level)
-        gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
+        try:
+
+            gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
+        except RuntimeError as e:
+            if "Unsupported type-agg combination" in str(e):
+                raise TypeError(
+                    "Correlation accepts only numerical column-pairs"
+                ) from e
+            else:
+                raise
 
         # ensure that column-pair labels are arranged in ascending order
         cols_list = []
@@ -877,12 +895,16 @@ def corr(self, method="pearson", min_periods=1):
 
         # create a multiindex for the groupby correlated dataframe,
         # to match pandas behavior
-        _idx = gb_corr._index.to_pandas().values.tolist()
-        _index = cudf.DataFrame(
-            sorted(_idx * len(_cols)), columns=gb_corr.index.names
-        )
-        _index[None] = _cols * (len(gb_corr.index))
-        res.index = _index
+        _idx = gb_corr.index.repeat(len(_cols))
+        idx_sort_order = _idx._get_sorted_inds()
+        _idx = _idx._gather(idx_sort_order)
+        # breakpoint()
+        if len(gb_corr):
+            # TO-DO: Should the operation below be done on the CPU instead?
+            _idx._data[None] = as_column(
+                cudf.Series(_cols).tile(len(gb_corr.index))
+            )
+        res.index = _index_from_data(_idx._data)
 
         return res
 

From 124c576bc761b7de9bd16be1f272bfcce5bf06cb Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 10 Nov 2021 13:34:32 -0800
Subject: [PATCH 66/79] all tests passing now

---
 python/cudf/cudf/tests/test_dataframe.py | 27 +++++++++++-------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index fe800fefac4..08bb54d3423 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8975,12 +8975,12 @@ def test_frame_series_where_other(data):
 @pytest.mark.parametrize(
     "min_per", [0, 1, 2, 3, 4],
 )
-def test_dataframe_pearson_corr(data, gkey, min_per):
+def test_pearson_corr_passing(data, gkey, min_per):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
-    expected = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
-    actual = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+    actual = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+    expected = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
 
     assert_eq(expected, actual)
 
@@ -9007,12 +9007,12 @@ def test_pearson_corr_empty_columns():
     gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
     pdf = gdf.to_pandas()
 
-    expected = gdf.groupby("id").corr("pearson")
-    actual = pdf.groupby("id").corr("pearson")
+    actual = gdf.groupby("id").corr("pearson")
+    expected = pdf.groupby("id").corr("pearson")
 
     assert_eq(
-        expected, actual
-    )  # fails: DataFrame.index classes are not equivalent
+        expected, actual, check_dtype=False, check_index_type=False,
+    )
 
 
 @pytest.mark.parametrize(
@@ -9032,13 +9032,10 @@ def test_pearson_corr_empty_columns():
 )
 @pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
 def test_pearson_corr_invalid_column_types(data, gkey):
-    try:
+    with pytest.raises(
+        TypeError, match="Correlation accepts only numerical column-pairs",
+    ):
         cudf.DataFrame(data).groupby(gkey).corr("pearson")
-    except RuntimeError as e:
-        if "Unsupported type-agg combination" in str(e):
-            raise TypeError(
-                "Correlation accepts only numerical column-pairs"
-            ) from e
 
 
 def test_pearson_corr_multiindex_dataframe():
@@ -9046,7 +9043,7 @@ def test_pearson_corr_multiindex_dataframe():
         {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]}
     ).set_index(["a", "b"])
 
-    expected = gdf.groupby(level="a").corr("pearson")
-    actual = gdf.to_pandas().groupby(level="a").corr("pearson")
+    actual = gdf.groupby(level="a").corr("pearson")
+    expected = gdf.to_pandas().groupby(level="a").corr("pearson")
 
     assert_eq(expected, actual)

From 23bfff7c0bfae1fc12c400cdfe22667351e757d9 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 11 Nov 2021 21:33:29 -0800
Subject: [PATCH 67/79] added corr() aggregation to cudf GroupBy docs

---
 docs/cudf/source/basics/groupby.rst | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index 04c4d42fa2a..05a631bd312 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -128,6 +128,30 @@ Aggregations on groups is supported via the ``agg`` method:
     1   4   1  2.0
     2   5   2  4.5
 
+    >>> gdf
+   id  val1  val2  val3
+   0  a     5     4     4
+   1  a     4     5     5
+   2  a     6     6     6
+   3  b     4     1     1
+   4  b     8     2     2
+   5  b     7     9     9
+   6  c     4     8     8
+   7  c     5     5     5
+   8  c     2     1     1
+   >>> gdf.groupby("id").corr(method="pearson")
+               val1      val2      val3
+   id
+   a   val1  1.000000  0.500000  0.500000
+      val2  0.500000  1.000000  1.000000
+      val3  0.500000  1.000000  1.000000
+   b   val1  1.000000  0.385727  0.385727
+      val2  0.385727  1.000000  1.000000
+      val3  0.385727  1.000000  1.000000
+   c   val1  1.000000  0.714575  0.714575
+      val2  0.714575  1.000000  1.000000
+      val3  0.714575  1.000000  1.000000
+
 The following table summarizes the available aggregations and the types
 that support them:
 
@@ -169,6 +193,9 @@ that support them:
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
    | unique                             | ✅        | ✅         | ✅       | ✅            |        |          |            |           |
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+   | corr                               | ✅        |            |          |               |        |          |            |           |
+   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+   
 
 GroupBy apply
 -------------

From ee5d30ede17c153b38fd0975d7be4dcee63e78b9 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 11 Nov 2021 21:35:35 -0800
Subject: [PATCH 68/79] fixed copyright years in aggregation.pxd

---
 python/cudf/cudf/_lib/cpp/aggregation.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 31839ee5fcc..3982b4fecbb 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string

From f3b85d1702a57c973b6a03a7126af806a9012867 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Thu, 11 Nov 2021 21:37:39 -0800
Subject: [PATCH 69/79] minor review fixes- list comprehension, rm breakpoints

---
 python/cudf/cudf/core/groupby/groupby.py | 28 ++++++++++--------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6bc04094d66..39a79f3cb7b 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -846,7 +846,6 @@ def corr(self, method="pearson", min_periods=1):
         # create expanded dataframe consisting all combinations of the
         # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
-        # breakpoint()
         _cols = self.grouping.values.columns.tolist()
 
         if self._by:
@@ -862,11 +861,11 @@ def corr(self, method="pearson", min_periods=1):
                 {"x": self.obj._data[i[0]], "y": self.obj._data[i[1]]}
             ).to_struct()
         new_gb = new_df.groupby(by=self._by, level=self._level)
-        try:
 
+        try:
             gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
         except RuntimeError as e:
-            if "Unsupported type-agg combination" in str(e):
+            if "Unsupported groupby reduction type-agg combination" in str(e):
                 raise TypeError(
                     "Correlation accepts only numerical column-pairs"
                 ) from e
@@ -874,13 +873,11 @@ def corr(self, method="pearson", min_periods=1):
                 raise
 
         # ensure that column-pair labels are arranged in ascending order
-        cols_list = []
-        for i, x in enumerate(_cols):
-            for j, y in enumerate(_cols):
-                if i > j:
-                    cols_list.append((_cols[j], _cols[i]))
-                else:
-                    cols_list.append((_cols[i], _cols[j]))
+        cols_list = [
+            (_cols[j], _cols[i]) if i > j else (_cols[i], _cols[j])
+            for j, y in enumerate(_cols)
+            for i, x in enumerate(_cols)
+        ]
         cols_split = [
             cols_list[i : i + len(_cols)]
             for i in range(0, len(cols_list), len(_cols))
@@ -895,16 +892,15 @@ def corr(self, method="pearson", min_periods=1):
 
         # create a multiindex for the groupby correlated dataframe,
         # to match pandas behavior
-        _idx = gb_corr.index.repeat(len(_cols))
-        idx_sort_order = _idx._get_sorted_inds()
-        _idx = _idx._gather(idx_sort_order)
-        # breakpoint()
+        unsorted_idx = gb_corr.index.repeat(len(_cols))
+        idx_sort_order = unsorted_idx._get_sorted_inds()
+        sorted_idx = unsorted_idx._gather(idx_sort_order)
         if len(gb_corr):
             # TO-DO: Should the operation below be done on the CPU instead?
-            _idx._data[None] = as_column(
+            sorted_idx._data[None] = as_column(
                 cudf.Series(_cols).tile(len(gb_corr.index))
             )
-        res.index = _index_from_data(_idx._data)
+        res.index = _index_from_data(sorted_idx._data)
 
         return res
 

From b22392595766451b626a1e9e07015265b662cd35 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Mon, 15 Nov 2021 13:23:16 -0800
Subject: [PATCH 70/79] apply @isvoid suggestions

Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
---
 python/cudf/cudf/core/groupby/groupby.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 23a9947d440..53f242b5d35 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -845,19 +845,13 @@ def corr(self, method="pearson", min_periods=1):
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
         _cols = self.grouping.values.columns.tolist()
 
-        if self._by:
-            new_df = cudf.DataFrame._from_data(self.grouping.keys._data)
-            new_df._data.multiindex = False
-        else:
-            new_df = cudf.DataFrame._from_data(
-                {}, index=_index_from_data(self.grouping.keys._data)
-            )
+        new_df_data = {}
 
         for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
-            new_df._data[i] = cudf.DataFrame._from_data(
+            new_df_data[i] = cudf.DataFrame._from_data(
                 {"x": self.obj._data[i[0]], "y": self.obj._data[i[1]]}
             ).to_struct()
-        new_gb = new_df.groupby(by=self._by, level=self._level)
+        new_gb = new_gb = cudf.DataFrame._from_data(new_df_data).groupby(by=self.grouping.keys)
 
         try:
             gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))

From 20c9273754dbd9680dcf2178ec7e46f871e6b3a2 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 16 Nov 2021 18:35:21 -0800
Subject: [PATCH 71/79] reversed copyright fix in cudf/_lib/aggregation.pxd

---
 docs/cudf/source/basics/groupby.rst | 31 +++++++----------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index 05a631bd312..0107b4b27ca 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -127,30 +127,13 @@ Aggregations on groups is supported via the ``agg`` method:
     a
     1   4   1  2.0
     2   5   2  4.5
-
-    >>> gdf
-   id  val1  val2  val3
-   0  a     5     4     4
-   1  a     4     5     5
-   2  a     6     6     6
-   3  b     4     1     1
-   4  b     8     2     2
-   5  b     7     9     9
-   6  c     4     8     8
-   7  c     5     5     5
-   8  c     2     1     1
-   >>> gdf.groupby("id").corr(method="pearson")
-               val1      val2      val3
-   id
-   a   val1  1.000000  0.500000  0.500000
-      val2  0.500000  1.000000  1.000000
-      val3  0.500000  1.000000  1.000000
-   b   val1  1.000000  0.385727  0.385727
-      val2  0.385727  1.000000  1.000000
-      val3  0.385727  1.000000  1.000000
-   c   val1  1.000000  0.714575  0.714575
-      val2  0.714575  1.000000  1.000000
-      val3  0.714575  1.000000  1.000000
+    >>> df.groupby("a").corr(method="pearson")
+              b          c
+    a                      
+    1 b  1.000000  0.866025
+      c  0.866025  1.000000
+    2 b  1.000000  1.000000
+      c  1.000000  1.000000
 
 The following table summarizes the available aggregations and the types
 that support them:

From af08150f037c951c98f9b8f21d8a1c94e1a1d119 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Tue, 16 Nov 2021 18:50:54 -0800
Subject: [PATCH 72/79] use existing dataframe for corr() example

---
 docs/cudf/source/basics/groupby.rst | 31 +++++++----------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index 05a631bd312..1596743b30f 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -127,30 +127,13 @@ Aggregations on groups is supported via the ``agg`` method:
     a
     1   4   1  2.0
     2   5   2  4.5
-
-    >>> gdf
-   id  val1  val2  val3
-   0  a     5     4     4
-   1  a     4     5     5
-   2  a     6     6     6
-   3  b     4     1     1
-   4  b     8     2     2
-   5  b     7     9     9
-   6  c     4     8     8
-   7  c     5     5     5
-   8  c     2     1     1
-   >>> gdf.groupby("id").corr(method="pearson")
-               val1      val2      val3
-   id
-   a   val1  1.000000  0.500000  0.500000
-      val2  0.500000  1.000000  1.000000
-      val3  0.500000  1.000000  1.000000
-   b   val1  1.000000  0.385727  0.385727
-      val2  0.385727  1.000000  1.000000
-      val3  0.385727  1.000000  1.000000
-   c   val1  1.000000  0.714575  0.714575
-      val2  0.714575  1.000000  1.000000
-      val3  0.714575  1.000000  1.000000
+   >>> df.groupby("a").corr(method="pearson")
+              b          c
+    a                      
+    1 b  1.000000  0.866025
+      c  0.866025  1.000000
+    2 b  1.000000  1.000000
+      c  1.000000  1.000000
 
 The following table summarizes the available aggregations and the types
 that support them:

From df616d0e37b34db02b7f49122bef7f1aeb2120e8 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 17 Nov 2021 11:37:16 -0800
Subject: [PATCH 73/79] noted that corr() is supported with decimals in the
 cudf docs

---
 docs/cudf/source/basics/groupby.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index 0107b4b27ca..f3269768025 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -176,7 +176,7 @@ that support them:
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
    | unique                             | ✅        | ✅         | ✅       | ✅            |        |          |            |           |
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | corr                               | ✅        |            |          |               |        |          |            |           |
+   | corr                               | ✅        |            |          |               |        |          |            | ✅        |
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
    
 

From 94f198446be9d061e45835d3f98018d46889d64c Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 17 Nov 2021 11:40:40 -0800
Subject: [PATCH 74/79] reversed copyright year in cudf/_lib/aggregation.pxd

---
 python/cudf/cudf/_lib/aggregation.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/aggregation.pxd b/python/cudf/cudf/_lib/aggregation.pxd
index 85a729ad2a3..84bcaed1b36 100644
--- a/python/cudf/cudf/_lib/aggregation.pxd
+++ b/python/cudf/cudf/_lib/aggregation.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 

From 663a71b9ecf22c08b654428224f8560d5e4f3d61 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Wed, 17 Nov 2021 11:59:19 -0800
Subject: [PATCH 75/79] addressed all reviews for groupby.py

---
 python/cudf/cudf/core/groupby/groupby.py | 32 ++++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 53f242b5d35..24176ba7321 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -787,10 +787,8 @@ def corr(self, method="pearson", min_periods=1):
 
         Parameters
         ----------
-        method: Method of correlation
-            Pearson: standard correlation coefficient.
-            Kendall, Spearman correlation and callable method
-            not yet supported.
+        method: {"pearson" (default), "kendall", "spearman"} or callable
+            Currently only the pearson correlation coefficient is supported.
 
         min_periods: int, optional
             Minimum number of observations required per pair of columns
@@ -846,12 +844,13 @@ def corr(self, method="pearson", min_periods=1):
         _cols = self.grouping.values.columns.tolist()
 
         new_df_data = {}
-
-        for i in tuple(itertools.combinations_with_replacement(_cols, 2)):
-            new_df_data[i] = cudf.DataFrame._from_data(
-                {"x": self.obj._data[i[0]], "y": self.obj._data[i[1]]}
+        for x, y in itertools.combinations_with_replacement(_cols, 2):
+            new_df_data[(x, y)] = cudf.DataFrame._from_data(
+                {"x": self.obj._data[x], "y": self.obj._data[y]}
             ).to_struct()
-        new_gb = new_gb = cudf.DataFrame._from_data(new_df_data).groupby(by=self.grouping.keys)
+        new_gb = cudf.DataFrame._from_data(new_df_data).groupby(
+            by=self.grouping.keys
+        )
 
         try:
             gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
@@ -859,9 +858,8 @@ def corr(self, method="pearson", min_periods=1):
             if "Unsupported groupby reduction type-agg combination" in str(e):
                 raise TypeError(
                     "Correlation accepts only numerical column-pairs"
-                ) from e
-            else:
-                raise
+                )
+            raise
 
         # ensure that column-pair labels are arranged in ascending order
         cols_list = [
@@ -876,10 +874,12 @@ def corr(self, method="pearson", min_periods=1):
 
         # interleave: combine the correlation results for each column-pair
         # into a single column
-        res = cudf.DataFrame()
-        for i, x in zip(cols_split, _cols):
-            ic = gb_corr.loc[:, i].interleave_columns()
-            res[x] = ic
+        res = cudf.DataFrame._from_data(
+            {
+                x: gb_corr.loc[:, i].interleave_columns()
+                for i, x in zip(cols_split, _cols)
+            }
+        )
 
         # create a multiindex for the groupby correlated dataframe,
         # to match pandas behavior

From 982d79d794c7bd8712fb3a2e16402b718d57e27b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 18 Nov 2021 09:22:09 -0500
Subject: [PATCH 76/79] Update python/cudf/cudf/core/groupby/groupby.py

Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
---
 python/cudf/cudf/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 24176ba7321..607d455ee65 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -863,7 +863,7 @@ def corr(self, method="pearson", min_periods=1):
 
         # ensure that column-pair labels are arranged in ascending order
         cols_list = [
-            (_cols[j], _cols[i]) if i > j else (_cols[i], _cols[j])
+            (y, x) if i > j else (x, y)
             for j, y in enumerate(_cols)
             for i, x in enumerate(_cols)
         ]

From 53465bbbc8c8452969c261b623e125b281889162 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Mon, 29 Nov 2021 16:11:18 -0800
Subject: [PATCH 77/79] addressed Vyas reviews

---
 python/cudf/cudf/core/groupby/groupby.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 0135b28547f..3c6c00d39a3 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -787,8 +787,9 @@ def corr(self, method="pearson", min_periods=1):
 
         Parameters
         ----------
-        method: {"pearson" (default), "kendall", "spearman"} or callable
-            Currently only the pearson correlation coefficient is supported.
+        method: {"pearson", "kendall", "spearman"} or callable,
+            default "pearson". Currently only the pearson correlation
+            coefficient is supported.
 
         min_periods: int, optional
             Minimum number of observations required per pair of columns
@@ -830,10 +831,9 @@ def corr(self, method="pearson", min_periods=1):
         c   val1  1.000000  0.714575  0.714575
             val2  0.714575  1.000000  1.000000
             val3  0.714575  1.000000  1.000000
-
         """
 
-        if not method.lower() in ["pearson"]:
+        if not method.lower() in ("pearson",):
             raise NotImplementedError(
                 "Only pearson correlation is currently supported"
             )
@@ -841,7 +841,9 @@ def corr(self, method="pearson", min_periods=1):
         # create expanded dataframe consisting all combinations of the
         # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
+        # breakpoint()
         _cols = self.grouping.values.columns.tolist()
+        len_cols = len(_cols)
 
         new_df_data = {}
         for x, y in itertools.combinations_with_replacement(_cols, 2):
@@ -868,8 +870,8 @@ def corr(self, method="pearson", min_periods=1):
             for i, x in enumerate(_cols)
         ]
         cols_split = [
-            cols_list[i : i + len(_cols)]
-            for i in range(0, len(cols_list), len(_cols))
+            cols_list[i : i + len_cols]
+            for i in range(0, len(cols_list), len_cols)
         ]
 
         # interleave: combine the correlation results for each column-pair
@@ -883,7 +885,7 @@ def corr(self, method="pearson", min_periods=1):
 
         # create a multiindex for the groupby correlated dataframe,
         # to match pandas behavior
-        unsorted_idx = gb_corr.index.repeat(len(_cols))
+        unsorted_idx = gb_corr.index.repeat(len_cols)
         idx_sort_order = unsorted_idx._get_sorted_inds()
         sorted_idx = unsorted_idx._gather(idx_sort_order)
         if len(gb_corr):

From f36ab44c5485d1743a56692941c5f12a59d840c2 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Mon, 29 Nov 2021 16:20:15 -0800
Subject: [PATCH 78/79] updated API link with corr in api_docs/groupby.rst

---
 docs/cudf/source/api_docs/groupby.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
index cf08d1d791b..575d7442cdf 100644
--- a/docs/cudf/source/api_docs/groupby.rst
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -59,6 +59,7 @@ Computations / descriptive stats
    GroupBy.std
    GroupBy.sum
    GroupBy.var
+   GroupBy.corr
    
 The following methods are available in both ``SeriesGroupBy`` and
 ``DataFrameGroupBy`` objects, but may differ slightly, usually in that

From 28d0a0a1b0fd86ceb71e91d34ca6b25d513fa625 Mon Sep 17 00:00:00 2001
From: Sheilah <kirui.sheilah@gmail.com>
Date: Mon, 29 Nov 2021 16:35:12 -0800
Subject: [PATCH 79/79] .

---
 python/cudf/cudf/core/groupby/groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 3c6c00d39a3..f1d622362e2 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -15,7 +15,7 @@
 from cudf.api.types import is_list_like
 from cudf.core.abc import Serializable
 from cudf.core.column.column import arange, as_column
-from cudf.core.index import _index_from_data
+from cudf.core.multiindex import MultiIndex
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
@@ -841,7 +841,6 @@ def corr(self, method="pearson", min_periods=1):
         # create expanded dataframe consisting all combinations of the
         # struct columns-pairs to be correlated
         # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
-        # breakpoint()
         _cols = self.grouping.values.columns.tolist()
         len_cols = len(_cols)
 
@@ -893,7 +892,7 @@ def corr(self, method="pearson", min_periods=1):
             sorted_idx._data[None] = as_column(
                 cudf.Series(_cols).tile(len(gb_corr.index))
             )
-        res.index = _index_from_data(sorted_idx._data)
+        res.index = MultiIndex._from_data(sorted_idx._data)
 
         return res