Fix linalg::map to work with non-power-of-2-sized types again (#1453)

Closes issue: #1413. Adds back the ability for `raft::linalg::map` to work with non-power-of-2-sized types. Authors: - Allard Hendriksen (https://github.com/ahendriksen) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) URL: #1453
rapidsai · May 15, 2023 · 5524cb9 · 5524cb9
1 parent b32cd60
commit 5524cb9
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 7 deletions.
diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh
@@ -116,15 +116,26 @@ struct ratio_selector {
   template <typename T>
   constexpr static auto ignoring_alignment() -> ratio_selector
   {
-    return ratio_selector{raft::div_rounding_up_safe<size_t>(kCoalescedVectorSize, sizeof(T)), 0};
+    constexpr bool T_evenly_fits_in_cache_line = (kCoalescedVectorSize % sizeof(T)) == 0;
+
+    if constexpr (T_evenly_fits_in_cache_line) {
+      return ratio_selector{size_t(kCoalescedVectorSize / sizeof(T)), 0};
+    } else {
+      return ratio_selector{1, 0};
+    }
   }
 
   template <typename T>
   explicit ratio_selector(const T* ptr)
   {
     constexpr auto s = ignoring_alignment<T>();  // NOLINT
-    align            = int(Pow2<sizeof(T) * s.ratio>::roundUp(ptr) - ptr);
-    ratio            = int(s.ratio);
+
+    if constexpr (s.ratio == 1) {
+      align = 0;
+    } else {
+      align = int(Pow2<sizeof(T) * s.ratio>::roundUp(ptr) - ptr);
+    }
+    ratio = int(s.ratio);
   }
 };
 

diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu
@@ -17,14 +17,69 @@
 #include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/linalg/map.cuh>
+#include <raft/matrix/init.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
 namespace linalg {
 
+/*
+ * Padded_float is a 12 byte type that contains a single float. Two integers are
+ * used for padding. It is used to test types that are not power-of-two-sized.
+ */
+struct padded_float {
+  float value_;
+  int padding1;
+  int padding2;
+
+  padded_float() = default;
+  constexpr padded_float(const float& x) : value_(x), padding1(0), padding2(0) {}
+  constexpr padded_float(const padded_float&)            = default;
+  constexpr padded_float& operator=(const padded_float&) = default;
+  constexpr float abs() const { return std::abs(value_); }
+};
+
+constexpr padded_float operator+(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ + y.value_);
+}
+
+constexpr padded_float operator-(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ - y.value_);
+}
+constexpr padded_float operator*(const padded_float& x, const padded_float& y)
+{
+  return padded_float(x.value_ * y.value_);
+}
+constexpr padded_float operator*(const padded_float& x, const int& scalar)
+{
+  return padded_float(scalar * x.value_);
+}
+constexpr bool operator==(const padded_float& x, const padded_float& y)
+{
+  return x.value_ == y.value_;
+}
+
+constexpr bool operator<(const padded_float& x, const padded_float& y)
+{
+  return x.value_ < y.value_;
+}
+constexpr bool operator>(const padded_float& x, const padded_float& y)
+{
+  return x.value_ > y.value_;
+}
+inline auto operator<<(std::ostream& os, const padded_float& x) -> std::ostream&
+{
+  os << x.value_;
+  return os;
+}
+
 template <typename InType, typename IdxType, typename OutType>
 void mapLaunch(OutType* out,
                const InType* in1,
@@ -86,15 +141,38 @@ class MapTest : public ::testing::TestWithParam<MapInputs<InType, IdxType, OutTy
   {
   }
 
- protected:
   void SetUp() override
   {
     raft::random::RngState r(params.seed);
 
     IdxType len = params.len;
-    uniform(handle, r, in1.data(), len, InType(-1.0), InType(1.0));
-    uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0));
-    uniform(handle, r, in3.data(), len, InType(-1.0), InType(1.0));
+    if constexpr (std::is_floating_point<InType>::value) {
+      uniform(handle, r, in1.data(), len, InType(-1.0), InType(1.0));
+      uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0));
+      uniform(handle, r, in3.data(), len, InType(-1.0), InType(1.0));
+    } else {
+      // First create random float arrays
+      rmm::device_uvector<float> fin1(params.len, stream);
+      rmm::device_uvector<float> fin2(params.len, stream);
+      rmm::device_uvector<float> fin3(params.len, stream);
+      uniform(handle, r, fin1.data(), len, float(-1.0), float(1.0));
+      uniform(handle, r, fin2.data(), len, float(-1.0), float(1.0));
+      uniform(handle, r, fin3.data(), len, float(-1.0), float(1.0));
+
+      // Then pad them
+      raft::device_resources handle{stream};
+      auto fin1_view = raft::make_device_vector_view(fin1.data(), fin1.size());
+      auto fin2_view = raft::make_device_vector_view(fin2.data(), fin2.size());
+      auto fin3_view = raft::make_device_vector_view(fin3.data(), fin3.size());
+      auto in1_view  = raft::make_device_vector_view(in1.data(), in1.size());
+      auto in2_view  = raft::make_device_vector_view(in2.data(), in2.size());
+      auto in3_view  = raft::make_device_vector_view(in3.data(), in3.size());
+
+      auto add_padding = [] __device__(float a) { return padded_float(a); };
+      raft::linalg::map(handle, in1_view, add_padding, raft::make_const_mdspan(fin1_view));
+      raft::linalg::map(handle, in2_view, add_padding, raft::make_const_mdspan(fin2_view));
+      raft::linalg::map(handle, in3_view, add_padding, raft::make_const_mdspan(fin3_view));
+    }
 
     create_ref(out_ref.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream);
     mapLaunch(out.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream);
@@ -175,5 +253,40 @@ const std::vector<MapInputs<double, size_t>> inputsd_i64 = {
 MAP_TEST((MapTest<double, size_t>), MapTestD_i64, inputsd_i64);
 MAP_TEST((MapOffsetTest<double, size_t>), MapOffsetTestD_i64, inputsd_i64);
 
+// This comparison structure is necessary, because it is not straight-forward to
+// add an overload of std::abs for padded_float.
+struct ComparePadded {
+  float eps;
+  ComparePadded(float eps_) : eps(eps_) {}
+  ComparePadded(padded_float eps_) : eps(eps_.value_) {}
+  ComparePadded(double eps_) : eps(eps_) {}
+  bool operator()(const padded_float& a, const padded_float& b) const
+  {
+    float diff  = (a - b).abs();
+    float m     = std::max(a.abs(), b.abs());
+    float ratio = diff > eps ? diff / m : diff;
+    return (ratio <= eps);
+  }
+};
+
+// Use PaddedComparison
+#define MAP_TEST_PADDED(test_type, test_name, inputs)                \
+  typedef RAFT_DEPAREN(test_type) test_name;                         \
+  TEST_P(test_name, Result)                                          \
+  {                                                                  \
+    ASSERT_TRUE(devArrMatch(this->out_ref.data(),                    \
+                            this->out.data(),                        \
+                            this->params.len,                        \
+                            ComparePadded(this->params.tolerance))); \
+  }                                                                  \
+  INSTANTIATE_TEST_SUITE_P(MapTests, test_name, ::testing::ValuesIn(inputs))
+
+const std::vector<MapInputs<padded_float, size_t>> inputsd_padded_float = {
+  {0.00000001, 1024 * 1024, 1234ULL, 5.2}};
+MAP_TEST_PADDED((MapTest<padded_float, size_t>), MapTestD_padded_float, inputsd_padded_float);
+MAP_TEST_PADDED((MapOffsetTest<padded_float, size_t>),
+                MapOffsetTestD_padded_float,
+                inputsd_padded_float);
+
 }  // namespace linalg
 }  // namespace raft