diff --git a/cpp/include/raft/linalg/detail/map.cuh b/cpp/include/raft/linalg/detail/map.cuh index 90b653b711..c4959e6812 100644 --- a/cpp/include/raft/linalg/detail/map.cuh +++ b/cpp/include/raft/linalg/detail/map.cuh @@ -116,15 +116,26 @@ struct ratio_selector { template constexpr static auto ignoring_alignment() -> ratio_selector { - return ratio_selector{raft::div_rounding_up_safe(kCoalescedVectorSize, sizeof(T)), 0}; + constexpr bool T_evenly_fits_in_cache_line = (kCoalescedVectorSize % sizeof(T)) == 0; + + if constexpr (T_evenly_fits_in_cache_line) { + return ratio_selector{size_t(kCoalescedVectorSize / sizeof(T)), 0}; + } else { + return ratio_selector{1, 0}; + } } template explicit ratio_selector(const T* ptr) { constexpr auto s = ignoring_alignment(); // NOLINT - align = int(Pow2::roundUp(ptr) - ptr); - ratio = int(s.ratio); + + if constexpr (s.ratio == 1) { + align = 0; + } else { + align = int(Pow2::roundUp(ptr) - ptr); + } + ratio = int(s.ratio); } }; diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu index 15b40808ee..8f2c3ed372 100644 --- a/cpp/test/linalg/map.cu +++ b/cpp/test/linalg/map.cu @@ -17,14 +17,69 @@ #include "../test_utils.cuh" #include "unary_op.cuh" #include +#include +#include #include #include +#include #include #include namespace raft { namespace linalg { +/* + * Padded_float is a 12 byte type that contains a single float. Two integers are + * used for padding. It is used to test types that are not power-of-two-sized. + */ +struct padded_float { + float value_; + int padding1; + int padding2; + + padded_float() = default; + constexpr padded_float(const float& x) : value_(x), padding1(0), padding2(0) {} + constexpr padded_float(const padded_float&) = default; + constexpr padded_float& operator=(const padded_float&) = default; + constexpr float abs() const { return std::abs(value_); } +}; + +constexpr padded_float operator+(const padded_float& x, const padded_float& y) +{ + return padded_float(x.value_ + y.value_); +} + +constexpr padded_float operator-(const padded_float& x, const padded_float& y) +{ + return padded_float(x.value_ - y.value_); +} +constexpr padded_float operator*(const padded_float& x, const padded_float& y) +{ + return padded_float(x.value_ * y.value_); +} +constexpr padded_float operator*(const padded_float& x, const int& scalar) +{ + return padded_float(scalar * x.value_); +} +constexpr bool operator==(const padded_float& x, const padded_float& y) +{ + return x.value_ == y.value_; +} + +constexpr bool operator<(const padded_float& x, const padded_float& y) +{ + return x.value_ < y.value_; +} +constexpr bool operator>(const padded_float& x, const padded_float& y) +{ + return x.value_ > y.value_; +} +inline auto operator<<(std::ostream& os, const padded_float& x) -> std::ostream& +{ + os << x.value_; + return os; +} + template void mapLaunch(OutType* out, const InType* in1, @@ -86,15 +141,38 @@ class MapTest : public ::testing::TestWithParam::value) { + uniform(handle, r, in1.data(), len, InType(-1.0), InType(1.0)); + uniform(handle, r, in2.data(), len, InType(-1.0), InType(1.0)); + uniform(handle, r, in3.data(), len, InType(-1.0), InType(1.0)); + } else { + // First create random float arrays + rmm::device_uvector fin1(params.len, stream); + rmm::device_uvector fin2(params.len, stream); + rmm::device_uvector fin3(params.len, stream); + uniform(handle, r, fin1.data(), len, float(-1.0), float(1.0)); + uniform(handle, r, fin2.data(), len, float(-1.0), float(1.0)); + uniform(handle, r, fin3.data(), len, float(-1.0), float(1.0)); + + // Then pad them + raft::device_resources handle{stream}; + auto fin1_view = raft::make_device_vector_view(fin1.data(), fin1.size()); + auto fin2_view = raft::make_device_vector_view(fin2.data(), fin2.size()); + auto fin3_view = raft::make_device_vector_view(fin3.data(), fin3.size()); + auto in1_view = raft::make_device_vector_view(in1.data(), in1.size()); + auto in2_view = raft::make_device_vector_view(in2.data(), in2.size()); + auto in3_view = raft::make_device_vector_view(in3.data(), in3.size()); + + auto add_padding = [] __device__(float a) { return padded_float(a); }; + raft::linalg::map(handle, in1_view, add_padding, raft::make_const_mdspan(fin1_view)); + raft::linalg::map(handle, in2_view, add_padding, raft::make_const_mdspan(fin2_view)); + raft::linalg::map(handle, in3_view, add_padding, raft::make_const_mdspan(fin3_view)); + } create_ref(out_ref.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream); mapLaunch(out.data(), in1.data(), in2.data(), in3.data(), params.scalar, len, stream); @@ -175,5 +253,40 @@ const std::vector> inputsd_i64 = { MAP_TEST((MapTest), MapTestD_i64, inputsd_i64); MAP_TEST((MapOffsetTest), MapOffsetTestD_i64, inputsd_i64); +// This comparison structure is necessary, because it is not straight-forward to +// add an overload of std::abs for padded_float. +struct ComparePadded { + float eps; + ComparePadded(float eps_) : eps(eps_) {} + ComparePadded(padded_float eps_) : eps(eps_.value_) {} + ComparePadded(double eps_) : eps(eps_) {} + bool operator()(const padded_float& a, const padded_float& b) const + { + float diff = (a - b).abs(); + float m = std::max(a.abs(), b.abs()); + float ratio = diff > eps ? diff / m : diff; + return (ratio <= eps); + } +}; + +// Use PaddedComparison +#define MAP_TEST_PADDED(test_type, test_name, inputs) \ + typedef RAFT_DEPAREN(test_type) test_name; \ + TEST_P(test_name, Result) \ + { \ + ASSERT_TRUE(devArrMatch(this->out_ref.data(), \ + this->out.data(), \ + this->params.len, \ + ComparePadded(this->params.tolerance))); \ + } \ + INSTANTIATE_TEST_SUITE_P(MapTests, test_name, ::testing::ValuesIn(inputs)) + +const std::vector> inputsd_padded_float = { + {0.00000001, 1024 * 1024, 1234ULL, 5.2}}; +MAP_TEST_PADDED((MapTest), MapTestD_padded_float, inputsd_padded_float); +MAP_TEST_PADDED((MapOffsetTest), + MapOffsetTestD_padded_float, + inputsd_padded_float); + } // namespace linalg } // namespace raft