From d1d31e299e3508f927cf0be605ed59a4adc4b0d6 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Mon, 16 Jan 2023 17:49:09 +0100
Subject: [PATCH 01/13] math.hpp header and host-side test

---
 cpp/include/raft/core/math.hpp       | 284 +++++++++++++++++++++++++++
 cpp/include/raft/util/cuda_utils.cuh |  52 +++--
 cpp/test/CMakeLists.txt              |   1 +
 cpp/test/core/math_host.cpp          | 179 +++++++++++++++++
 4 files changed, 497 insertions(+), 19 deletions(-)
 create mode 100644 cpp/include/raft/core/math.hpp
 create mode 100644 cpp/test/core/math_host.cpp
diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
new file mode 100644
index 0000000000..dd81fafb45
--- /dev/null
+++ b/cpp/include/raft/core/math.hpp
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <type_traits>
+
+#include <raft/core/detail/macros.hpp>
+
+namespace raft {
+
+/**
+ * Absolute value
+ *
+ * Note: no explicit type restrictions on this one, let the compiler find the appropriate overloads
+ * (e.g int8_t casts to int) or fail.
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
+{
+#ifdef __CUDA_ARCH__
+  return ::abs(x);
+#else
+  return std::abs(x);
+#endif
+}
+
+/**
+ * Inverse hyperbolic tangent
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto atanh(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::atanh(x);
+#else
+  return std::atanh(x);
+#endif
+}
+
+/**
+ * Cosine
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto cos(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::cos(x);
+#else
+  return std::cos(x);
+#endif
+}
+
+/**
+ * Exponential function
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto exp(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::exp(x);
+#else
+  return std::exp(x);
+#endif
+}
+
+/**
+ * Natural logarithm
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto log(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::log(x);
+#else
+  return std::log(x);
+#endif
+}
+
+/**
+ * @defgroup Maximum of two or more values.
+ *
+ * The CUDA Math API has overloads for all combinations of float/double. We provide similar
+ * functionality while wrapping around std::max, which only supports arguments of the same type.
+ * However, though the CUDA Math API supports combinations of unsigned and signed integers, this is
+ * very error-prone so we do not support that and require the user to cast instead. (e.g the max of
+ * -1 and 1u is 4294967295u...)
+ *
+ * When no overload matches, we provide a generic implementation but require that both types be the
+ * same (and that the less-than operator be defined).
+ * @{
+ */
+template <typename T1, typename T2>
+constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y)
+{
+#ifdef __CUDA_ARCH__
+  // Combinations of types supported by the CUDA Math API
+  if constexpr ((std::is_integral_v<T1> && std::is_integral_v<T2> && std::is_same_v<T1, T2>) ||
+                ((std::is_same_v<T1, float> || std::is_same_v<T1, double>)&&(
+                  std::is_same_v<T2, float> || std::is_same_v<T2, double>))) {
+    return ::max(x, y);
+  }
+  // Else, check that the types are the same and provide a generic implementation
+  else {
+    static_assert(
+      std::is_same_v<T1, T2>,
+      "No native max overload for these types. Both argument types must be the same to use "
+      "the generic max. Please cast appropriately.");
+    return (x < y) ? y : x;
+  }
+#else
+  if constexpr (std::is_same_v<T1, float> && std::is_same_v<T2, double>) {
+    return std::max(static_cast<double>(x), y);
+  } else if constexpr (std::is_same_v<T1, double> && std::is_same_v<T2, float>) {
+    return std::max(x, static_cast<double>(y));
+  } else {
+    static_assert(
+      std::is_same_v<T1, T2>,
+      "std::max requires that both argument types be the same. Please cast appropriately.");
+    return std::max(x, y);
+  }
+#endif
+}
+
+template <typename T1, typename T2, typename... Args>
+constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y, Args&&... args)
+{
+  return raft::max(x, raft::max(y, std::forward<Args>(args)...));
+}
+/** @} */
+
+/**
+ * @defgroup Minimum of two or more values.
+ *
+ * The CUDA Math API has overloads for all combinations of float/double. We provide similar
+ * functionality while wrapping around std::min, which only supports arguments of the same type.
+ * However, though the CUDA Math API supports combinations of unsigned and signed integers, this is
+ * very error-prone so we do not support that and require the user to cast instead. (e.g the min of
+ * -1 and 1u is 1u...)
+ *
+ * When no overload matches, we provide a generic implementation but require that both types be the
+ * same (and that the less-than operator be defined).
+ * @{
+ */
+template <typename T1, typename T2>
+constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y)
+{
+#ifdef __CUDA_ARCH__
+  // Combinations of types supported by the CUDA Math API
+  if constexpr ((std::is_integral_v<T1> && std::is_integral_v<T2> && std::is_same_v<T1, T2>) ||
+                ((std::is_same_v<T1, float> || std::is_same_v<T1, double>)&&(
+                  std::is_same_v<T2, float> || std::is_same_v<T2, double>))) {
+    return ::min(x, y);
+  }
+  // Else, check that the types are the same and provide a generic implementation
+  else {
+    static_assert(
+      std::is_same_v<T1, T2>,
+      "No native min overload for these types. Both argument types must be the same to use "
+      "the generic min. Please cast appropriately.");
+    return (y < x) ? y : x;
+  }
+#else
+  if constexpr (std::is_same_v<T1, float> && std::is_same_v<T2, double>) {
+    return std::min(static_cast<double>(x), y);
+  } else if constexpr (std::is_same_v<T1, double> && std::is_same_v<T2, float>) {
+    return std::min(x, static_cast<double>(y));
+  } else {
+    static_assert(
+      std::is_same_v<T1, T2>,
+      "std::min requires that both argument types be the same. Please cast appropriately.");
+    return std::min(x, y);
+  }
+#endif
+}
+
+template <typename T1, typename T2, typename... Args>
+constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args)
+{
+  return raft::min(x, raft::min(y, std::forward<Args>(args)...));
+}
+/** @} */
+
+/**
+ * Power
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto pow(const T& x, const T& y)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::pow(x, y);
+#else
+  return std::pow(x, y);
+#endif
+}
+
+/**
+ * Sign
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto sgn(const T val) -> int
+{
+  return (T(0) < val) - (val < T(0));
+}
+
+/**
+ * Sine
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto sin(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::sin(x);
+#else
+  return std::sin(x);
+#endif
+}
+
+/**
+ * Sine and cosine
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION
+  std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>>
+  sincos(const T& x, T* s, T* c)
+{
+#ifdef __CUDA_ARCH__
+  ::sincos(x, s, c);
+#else
+  *s = std::sin(x);
+  *c = std::cos(x);
+#endif
+}
+
+/**
+ * Square root
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto sqrt(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::sqrt(x);
+#else
+  return std::sqrt(x);
+#endif
+}
+
+/**
+ * Hyperbolic tangent
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto tanh(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::tanh(x);
+#else
+  return std::tanh(x);
+#endif
+}
+
+}  // namespace raft
diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index 61dd6e0ad8..2d515cbca8 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -21,6 +21,7 @@
 #include <type_traits>
 
 #include <raft/core/cudart_utils.hpp>
+#include <raft/core/math.hpp>
 #include <raft/core/operators.hpp>
 
 #ifndef ENABLE_MEMCPY_ASYNC
@@ -259,11 +260,13 @@ DI double myAtomicMax(double* address, double val)
 template <typename T>
 HDI T myMax(T x, T y);
 template <>
+[[deprecated("use raft::max from raft/core/math.hpp instead")]]
 HDI float myMax<float>(float x, float y)
 {
   return fmaxf(x, y);
 }
 template <>
+[[deprecated("use raft::max from raft/core/math.hpp instead")]]
 HDI double myMax<double>(double x, double y)
 {
   return fmax(x, y);
@@ -277,11 +280,13 @@ HDI double myMax<double>(double x, double y)
 template <typename T>
 HDI T myMin(T x, T y);
 template <>
+[[deprecated("use raft::min from raft/core/math.hpp instead")]]
 HDI float myMin<float>(float x, float y)
 {
   return fminf(x, y);
 }
 template <>
+[[deprecated("use raft::min from raft/core/math.hpp instead")]]
 HDI double myMin<double>(double x, double y)
 {
   return fmin(x, y);
@@ -316,15 +321,6 @@ DI T myAtomicMax(T* address, T val)
   return *address;
 }
 
-/**
- * Sign function
- */
-template <typename T>
-HDI int sgn(const T val)
-{
-  return (T(0) < val) - (val < T(0));
-}
-
 /**
  * @defgroup Exp Exponential function
  * @{
@@ -332,14 +328,16 @@ HDI int sgn(const T val)
 template <typename T>
 HDI T myExp(T x);
 template <>
+[[deprecated("use raft::exp from raft/core/math.hpp instead")]]
 HDI float myExp(float x)
 {
   return expf(x);
 }
 template <>
+[[deprecated("use raft::exp from raft/core/math.hpp instead")]]
 HDI double myExp(double x)
 {
-  return exp(x);
+  return ::exp(x);
 }
 /** @} */
 
@@ -368,14 +366,16 @@ inline __device__ double myInf<double>()
 template <typename T>
 HDI T myLog(T x);
 template <>
+[[deprecated("use raft::log from raft/core/math.hpp instead")]]
 HDI float myLog(float x)
 {
   return logf(x);
 }
 template <>
+[[deprecated("use raft::log from raft/core/math.hpp instead")]]
 HDI double myLog(double x)
 {
-  return log(x);
+  return ::log(x);
 }
 /** @} */
 
@@ -386,14 +386,16 @@ HDI double myLog(double x)
 template <typename T>
 HDI T mySqrt(T x);
 template <>
+[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]]
 HDI float mySqrt(float x)
 {
   return sqrtf(x);
 }
 template <>
+[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]]
 HDI double mySqrt(double x)
 {
-  return sqrt(x);
+  return ::sqrt(x);
 }
 /** @} */
 
@@ -404,14 +406,16 @@ HDI double mySqrt(double x)
 template <typename T>
 DI void mySinCos(T x, T& s, T& c);
 template <>
+[[deprecated("use raft::sincos from raft/core/math.hpp instead")]]
 DI void mySinCos(float x, float& s, float& c)
 {
   sincosf(x, &s, &c);
 }
 template <>
+[[deprecated("use raft::sincos from raft/core/math.hpp instead")]]
 DI void mySinCos(double x, double& s, double& c)
 {
-  sincos(x, &s, &c);
+  ::sincos(x, &s, &c);
 }
 /** @} */
 
@@ -422,14 +426,16 @@ DI void mySinCos(double x, double& s, double& c)
 template <typename T>
 DI T mySin(T x);
 template <>
+[[deprecated("use raft::sin from raft/core/math.hpp instead")]]
 DI float mySin(float x)
 {
   return sinf(x);
 }
 template <>
+[[deprecated("use raft::sin from raft/core/math.hpp instead")]]
 DI double mySin(double x)
 {
-  return sin(x);
+  return ::sin(x);
 }
 /** @} */
 
@@ -443,11 +449,13 @@ DI T myAbs(T x)
   return x < 0 ? -x : x;
 }
 template <>
+[[deprecated("use raft::abs from raft/core/math.hpp instead")]]
 DI float myAbs(float x)
 {
   return fabsf(x);
 }
 template <>
+[[deprecated("use raft::abs from raft/core/math.hpp instead")]]
 DI double myAbs(double x)
 {
   return fabs(x);
@@ -461,14 +469,16 @@ DI double myAbs(double x)
 template <typename T>
 HDI T myPow(T x, T power);
 template <>
+[[deprecated("use raft::pow from raft/core/math.hpp instead")]]
 HDI float myPow(float x, float power)
 {
   return powf(x, power);
 }
 template <>
+[[deprecated("use raft::pow from raft/core/math.hpp instead")]]
 HDI double myPow(double x, double power)
 {
-  return pow(x, power);
+  return ::pow(x, power);
 }
 /** @} */
 
@@ -479,14 +489,16 @@ HDI double myPow(double x, double power)
 template <typename T>
 HDI T myTanh(T x);
 template <>
+[[deprecated("use raft::tanh from raft/core/math.hpp instead")]]
 HDI float myTanh(float x)
 {
   return tanhf(x);
 }
 template <>
+[[deprecated("use raft::tanh from raft/core/math.hpp instead")]]
 HDI double myTanh(double x)
 {
-  return tanh(x);
+  return ::tanh(x);
 }
 /** @} */
 
@@ -497,14 +509,16 @@ HDI double myTanh(double x)
 template <typename T>
 HDI T myATanh(T x);
 template <>
+[[deprecated("use raft::atanh from raft/core/math.hpp instead")]]
 HDI float myATanh(float x)
 {
   return atanhf(x);
 }
 template <>
+[[deprecated("use raft::atanh from raft/core/math.hpp instead")]]
 HDI double myATanh(double x)
 {
-  return atanh(x);
+  return ::atanh(x);
 }
 /** @} */
 
@@ -526,7 +540,7 @@ struct SqrtOp {
   [[deprecated("SqrtOp is deprecated. Use sqrt_op instead.")]] HDI Type
   operator()(Type in, IdxType i = 0) const
   {
-    return mySqrt(in);
+    return raft::sqrt(in);
   }
 };
 
@@ -544,7 +558,7 @@ struct L1Op {
   [[deprecated("L1Op is deprecated. Use abs_op instead.")]] HDI Type operator()(Type in,
                                                                                 IdxType i = 0) const
   {
-    return myAbs(in);
+    return raft::abs(in);
   }
 };
 
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 8ca30a5c82..882c2af59c 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -86,6 +86,7 @@ if(BUILD_TESTS)
     CORE_TEST
     PATH
     test/core/logger.cpp
+    test/core/math_host.cpp
     test/core/operators_device.cu
     test/core/operators_host.cpp
     test/core/handle.cpp
diff --git a/cpp/test/core/math_host.cpp b/cpp/test/core/math_host.cpp
new file mode 100644
index 0000000000..255b2e8979
--- /dev/null
+++ b/cpp/test/core/math_host.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../test_utils.h"
+#include <raft/core/math.hpp>
+
+TEST(MathHost, Abs)
+{
+  // Integer abs
+  ASSERT_TRUE(raft::match(123, raft::abs(int8_t{-123}), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(12345, raft::abs(-12345), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(12345l, raft::abs(-12345l), raft::Compare<long int>()));
+  ASSERT_TRUE(
+    raft::match(123451234512345ll, raft::abs(-123451234512345ll), raft::Compare<long long int>()));
+  // Floating-point abs
+  ASSERT_TRUE(raft::match(12.34f, raft::abs(-12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(12.34, raft::abs(-12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Atanh)
+{
+  ASSERT_TRUE(
+    raft::match(std::atanh(0.123f), raft::atanh(0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::atanh(0.123), raft::atanh(0.123), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Cos)
+{
+  ASSERT_TRUE(
+    raft::match(std::cos(12.34f), raft::cos(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::cos(12.34), raft::cos(12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Exp)
+{
+  ASSERT_TRUE(
+    raft::match(std::exp(12.34f), raft::exp(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::exp(12.34), raft::exp(12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Log)
+{
+  ASSERT_TRUE(
+    raft::match(std::log(12.34f), raft::log(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::log(12.34), raft::log(12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Max2)
+{
+  ASSERT_TRUE(raft::match(1234, raft::max(-1234, 1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1234u, raft::max(1234u, 123u), raft::Compare<unsigned int>()));
+  ASSERT_TRUE(raft::match(1234ll, raft::max(-1234ll, 1234ll), raft::Compare<long long int>()));
+  ASSERT_TRUE(
+    raft::match(1234ull, raft::max(1234ull, 123ull), raft::Compare<unsigned long long int>()));
+
+  ASSERT_TRUE(raft::match(12.34f, raft::max(-12.34f, 12.34f), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34, raft::max(-12.34, 12.34), raft::Compare<double>()));
+  ASSERT_TRUE(raft::match(12.34, raft::max(-12.34f, 12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(12.34, raft::max(-12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Max3)
+{
+  ASSERT_TRUE(raft::match(1234, raft::max(1234, 0, -1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1234, raft::max(-1234, 1234, 0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1234, raft::max(0, -1234, 1234), raft::Compare<int>()));
+
+  ASSERT_TRUE(
+    raft::match(12.34, raft::max(12.34f, 0., -12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(
+    raft::match(12.34, raft::max(-12.34, 12.34f, 0.), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(
+    raft::match(12.34, raft::max(0., -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Min2)
+{
+  ASSERT_TRUE(raft::match(-1234, raft::min(-1234, 1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(123u, raft::min(1234u, 123u), raft::Compare<unsigned int>()));
+  ASSERT_TRUE(raft::match(-1234ll, raft::min(-1234ll, 1234ll), raft::Compare<long long int>()));
+  ASSERT_TRUE(
+    raft::match(123ull, raft::min(1234ull, 123ull), raft::Compare<unsigned long long int>()));
+
+  ASSERT_TRUE(raft::match(-12.34f, raft::min(-12.34f, 12.34f), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(-12.34, raft::min(-12.34, 12.34), raft::Compare<double>()));
+  ASSERT_TRUE(
+    raft::match(-12.34, raft::min(-12.34f, 12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(
+    raft::match(-12.34, raft::min(-12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Min3)
+{
+  ASSERT_TRUE(raft::match(-1234, raft::min(1234, 0, -1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1234, raft::min(-1234, 1234, 0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1234, raft::min(0, -1234, 1234), raft::Compare<int>()));
+
+  ASSERT_TRUE(
+    raft::match(-12.34, raft::min(12.34f, 0., -12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(
+    raft::match(-12.34, raft::min(-12.34, 12.34f, 0.), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(
+    raft::match(-12.34, raft::min(0., -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Pow)
+{
+  ASSERT_TRUE(raft::match(
+    std::pow(12.34f, 2.f), raft::pow(12.34f, 2.f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::pow(12.34, 2.), raft::pow(12.34, 2.), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Sgn)
+{
+  ASSERT_TRUE(raft::match(-1, raft::sgn(-1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(0, raft::sgn(0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1, raft::sgn(1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1, raft::sgn(-12.34f), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(0, raft::sgn(0.f), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1, raft::sgn(12.34f), raft::Compare<int>()));
+}
+
+TEST(MathHost, Sin)
+{
+  ASSERT_TRUE(
+    raft::match(std::sin(12.34f), raft::sin(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::sin(12.34), raft::sin(12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, SinCos)
+{
+  float xf = 12.34f;
+  float sf, cf;
+  raft::sincos(xf, &sf, &cf);
+  ASSERT_TRUE(raft::match(std::sin(12.34f), sf, raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(std::cos(12.34f), cf, raft::CompareApprox<float>(0.0001f)));
+  double xd = 12.34f;
+  double sd, cd;
+  raft::sincos(xd, &sd, &cd);
+  ASSERT_TRUE(raft::match(std::sin(12.34), sd, raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(std::cos(12.34), cd, raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Sqrt)
+{
+  ASSERT_TRUE(
+    raft::match(std::sqrt(12.34f), raft::sqrt(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::sqrt(12.34), raft::sqrt(12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Tanh)
+{
+  ASSERT_TRUE(
+    raft::match(std::tanh(12.34f), raft::tanh(12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::tanh(12.34), raft::tanh(12.34), raft::CompareApprox<double>(0.000001)));
+}

From 45e0d89e1d1d7ca27c9d9439b817f902d86987e3 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Mon, 16 Jan 2023 18:45:31 +0100
Subject: [PATCH 02/13] Device-side test and generic abs

---
 cpp/include/raft/core/math.hpp |  20 ++-
 cpp/test/CMakeLists.txt        |   1 +
 cpp/test/core/math_device.cu   | 320 +++++++++++++++++++++++++++++++++
 cpp/test/core/math_host.cpp    |   2 +-
 4 files changed, 338 insertions(+), 5 deletions(-)
 create mode 100644 cpp/test/core/math_device.cu

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index dd81fafb45..7932126e16 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -25,13 +25,15 @@
 namespace raft {
 
 /**
- * Absolute value
- *
- * Note: no explicit type restrictions on this one, let the compiler find the appropriate overloads
- * (e.g int8_t casts to int) or fail.
+ * @defgroup Absolute value
+ * @{
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T> ||
+                        std::is_same_v<int, T> || std::is_same_v<long int, T> ||
+                        std::is_same_v<long long int, T>,
+                      T>
 {
 #ifdef __CUDA_ARCH__
   return ::abs(x);
@@ -39,6 +41,16 @@ constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
   return std::abs(x);
 #endif
 }
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
+  -> std::enable_if_t<!std::is_same_v<float, T> && !std::is_same_v<double, T> &&
+                        !std::is_same_v<int, T> && !std::is_same_v<long int, T> &&
+                        !std::is_same_v<long long int, T>,
+                      T>
+{
+  return x < T{0} ? -x : x;
+}
+/** @} */
 
 /**
  * Inverse hyperbolic tangent
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 882c2af59c..a4b3758faa 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -86,6 +86,7 @@ if(BUILD_TESTS)
     CORE_TEST
     PATH
     test/core/logger.cpp
+    test/core/math_device.cu
     test/core/math_host.cpp
     test/core/operators_device.cu
     test/core/operators_host.cpp
diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu
new file mode 100644
index 0000000000..a19d5bc8dd
--- /dev/null
+++ b/cpp/test/core/math_device.cu
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../test_utils.h"
+#include <raft/core/math.hpp>
+#include <rmm/cuda_stream.hpp>
+#include <rmm/device_scalar.hpp>
+
+template <typename OutT, typename OpT, typename... Args>
+__global__ void math_eval_kernel(OutT* out, OpT op, Args... args)
+{
+  out[0] = op(std::forward<Args>(args)...);
+}
+
+template <typename OpT, typename... Args>
+auto math_eval(OpT op, Args&&... args)
+{
+  typedef decltype(op(args...)) OutT;
+  auto stream = rmm::cuda_stream_default;
+  rmm::device_scalar<OutT> result(stream);
+  math_eval_kernel<<<1, 1, 0, stream>>>(result.data(), op, std::forward<Args>(args)...);
+  return result.value(stream);
+}
+
+struct abs_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::abs(in);
+  }
+};
+
+TEST(MathDevice, Abs)
+{
+  // Integer abs
+  ASSERT_TRUE(
+    raft::match(int8_t{123}, math_eval(abs_test_op{}, int8_t{-123}), raft::Compare<int8_t>()));
+  ASSERT_TRUE(raft::match(12345, math_eval(abs_test_op{}, -12345), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(12345l, math_eval(abs_test_op{}, -12345l), raft::Compare<long int>()));
+  ASSERT_TRUE(raft::match(123451234512345ll,
+                          math_eval(abs_test_op{}, -123451234512345ll),
+                          raft::Compare<long long int>()));
+  // Floating-point abs
+  ASSERT_TRUE(
+    raft::match(12.34f, math_eval(abs_test_op{}, -12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(12.34, math_eval(abs_test_op{}, -12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct atanh_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::atanh(in);
+  }
+};
+
+TEST(MathDevice, Atanh)
+{
+  ASSERT_TRUE(raft::match(
+    std::atanh(0.123f), math_eval(atanh_test_op{}, 0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::atanh(0.123), math_eval(atanh_test_op{}, 0.123), raft::CompareApprox<double>(0.000001)));
+}
+
+struct cos_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::cos(in);
+  }
+};
+
+TEST(MathDevice, Cos)
+{
+  ASSERT_TRUE(raft::match(
+    std::cos(12.34f), math_eval(cos_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::cos(12.34), math_eval(cos_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct exp_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::exp(in);
+  }
+};
+
+TEST(MathDevice, Exp)
+{
+  ASSERT_TRUE(raft::match(
+    std::exp(12.34f), math_eval(exp_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::exp(12.34), math_eval(exp_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct log_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::log(in);
+  }
+};
+
+TEST(MathDevice, Log)
+{
+  ASSERT_TRUE(raft::match(
+    std::log(12.34f), math_eval(log_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::log(12.34), math_eval(log_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct max_test_op {
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  {
+    return raft::max(std::forward<Args>(args)...);
+  }
+};
+
+TEST(MathDevice, Max2)
+{
+  ASSERT_TRUE(raft::match(1234, math_eval(max_test_op{}, -1234, 1234), raft::Compare<int>()));
+  ASSERT_TRUE(
+    raft::match(1234u, math_eval(max_test_op{}, 1234u, 123u), raft::Compare<unsigned int>()));
+  ASSERT_TRUE(
+    raft::match(1234ll, math_eval(max_test_op{}, -1234ll, 1234ll), raft::Compare<long long int>()));
+  ASSERT_TRUE(raft::match(
+    1234ull, math_eval(max_test_op{}, 1234ull, 123ull), raft::Compare<unsigned long long int>()));
+
+  ASSERT_TRUE(
+    raft::match(12.34f, math_eval(max_test_op{}, -12.34f, 12.34f), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34, math_eval(max_test_op{}, -12.34, 12.34), raft::Compare<double>()));
+  ASSERT_TRUE(raft::match(
+    12.34, math_eval(max_test_op{}, -12.34f, 12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    12.34, math_eval(max_test_op{}, -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathDevice, Max3)
+{
+  ASSERT_TRUE(raft::match(1234, math_eval(max_test_op{}, 1234, 0, -1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1234, math_eval(max_test_op{}, -1234, 1234, 0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1234, math_eval(max_test_op{}, 0, -1234, 1234), raft::Compare<int>()));
+
+  ASSERT_TRUE(raft::match(
+    12.34, math_eval(max_test_op{}, 12.34f, 0., -12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    12.34, math_eval(max_test_op{}, -12.34, 12.34f, 0.), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    12.34, math_eval(max_test_op{}, 0., -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+struct min_test_op {
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  {
+    return raft::min(std::forward<Args>(args)...);
+  }
+};
+
+TEST(MathDevice, Min2)
+{
+  ASSERT_TRUE(raft::match(-1234, math_eval(min_test_op{}, -1234, 1234), raft::Compare<int>()));
+  ASSERT_TRUE(
+    raft::match(123u, math_eval(min_test_op{}, 1234u, 123u), raft::Compare<unsigned int>()));
+  ASSERT_TRUE(raft::match(
+    -1234ll, math_eval(min_test_op{}, -1234ll, 1234ll), raft::Compare<long long int>()));
+  ASSERT_TRUE(raft::match(
+    123ull, math_eval(min_test_op{}, 1234ull, 123ull), raft::Compare<unsigned long long int>()));
+
+  ASSERT_TRUE(
+    raft::match(-12.34f, math_eval(min_test_op{}, -12.34f, 12.34f), raft::Compare<float>()));
+  ASSERT_TRUE(
+    raft::match(-12.34, math_eval(min_test_op{}, -12.34, 12.34), raft::Compare<double>()));
+  ASSERT_TRUE(raft::match(
+    -12.34, math_eval(min_test_op{}, -12.34f, 12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    -12.34, math_eval(min_test_op{}, -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathDevice, Min3)
+{
+  ASSERT_TRUE(raft::match(-1234, math_eval(min_test_op{}, 1234, 0, -1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1234, math_eval(min_test_op{}, -1234, 1234, 0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1234, math_eval(min_test_op{}, 0, -1234, 1234), raft::Compare<int>()));
+
+  ASSERT_TRUE(raft::match(
+    -12.34, math_eval(min_test_op{}, 12.34f, 0., -12.34), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    -12.34, math_eval(min_test_op{}, -12.34, 12.34f, 0.), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(
+    -12.34, math_eval(min_test_op{}, 0., -12.34, 12.34f), raft::CompareApprox<double>(0.000001)));
+}
+
+struct pow_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& x, const Type& y) const
+  {
+    return raft::pow(x, y);
+  }
+};
+
+TEST(MathDevice, Pow)
+{
+  ASSERT_TRUE(raft::match(std::pow(12.34f, 2.f),
+                          math_eval(pow_test_op{}, 12.34f, 2.f),
+                          raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(std::pow(12.34, 2.),
+                          math_eval(pow_test_op{}, 12.34, 2.),
+                          raft::CompareApprox<double>(0.000001)));
+}
+
+struct sgn_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::sgn(in);
+  }
+};
+
+TEST(MathDevice, Sgn)
+{
+  ASSERT_TRUE(raft::match(-1, math_eval(sgn_test_op{}, -1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(0, math_eval(sgn_test_op{}, 0), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1, math_eval(sgn_test_op{}, 1234), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(-1, math_eval(sgn_test_op{}, -12.34f), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(0, math_eval(sgn_test_op{}, 0.f), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(1, math_eval(sgn_test_op{}, 12.34f), raft::Compare<int>()));
+}
+
+struct sin_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::sin(in);
+  }
+};
+
+TEST(MathDevice, Sin)
+{
+  ASSERT_TRUE(raft::match(
+    std::sin(12.34f), math_eval(sin_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::sin(12.34), math_eval(sin_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct sincos_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& x, Type* s, Type* c) const
+  {
+    raft::sincos(x, s, c);
+    return x;  // unused, just to avoid creating another helper
+  }
+};
+
+TEST(MathDevice, SinCos)
+{
+  auto stream = rmm::cuda_stream_default;
+  float xf    = 12.34f;
+  rmm::device_scalar<float> sf(stream);
+  rmm::device_scalar<float> cf(stream);
+  math_eval(sincos_test_op{}, xf, sf.data(), cf.data());
+  ASSERT_TRUE(raft::match(std::sin(12.34f), sf.value(stream), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(std::cos(12.34f), cf.value(stream), raft::CompareApprox<float>(0.0001f)));
+  double xd = 12.34f;
+  rmm::device_scalar<double> sd(stream);
+  rmm::device_scalar<double> cd(stream);
+  math_eval(sincos_test_op{}, xd, sd.data(), cd.data());
+  ASSERT_TRUE(raft::match(std::sin(12.34), sd.value(stream), raft::CompareApprox<double>(0.0001f)));
+  ASSERT_TRUE(raft::match(std::cos(12.34), cd.value(stream), raft::CompareApprox<double>(0.0001f)));
+}
+
+struct sqrt_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::sqrt(in);
+  }
+};
+
+TEST(MathDevice, Sqrt)
+{
+  ASSERT_TRUE(raft::match(
+    std::sqrt(12.34f), math_eval(sqrt_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::sqrt(12.34), math_eval(sqrt_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
+
+struct tanh_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::tanh(in);
+  }
+};
+
+TEST(MathDevice, Tanh)
+{
+  ASSERT_TRUE(raft::match(
+    std::tanh(12.34f), math_eval(tanh_test_op{}, 12.34f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::tanh(12.34), math_eval(tanh_test_op{}, 12.34), raft::CompareApprox<double>(0.000001)));
+}
diff --git a/cpp/test/core/math_host.cpp b/cpp/test/core/math_host.cpp
index 255b2e8979..d23f4fc8da 100644
--- a/cpp/test/core/math_host.cpp
+++ b/cpp/test/core/math_host.cpp
@@ -22,7 +22,7 @@
 TEST(MathHost, Abs)
 {
   // Integer abs
-  ASSERT_TRUE(raft::match(123, raft::abs(int8_t{-123}), raft::Compare<int>()));
+  ASSERT_TRUE(raft::match(int8_t{123}, raft::abs(int8_t{-123}), raft::Compare<int8_t>()));
   ASSERT_TRUE(raft::match(12345, raft::abs(-12345), raft::Compare<int>()));
   ASSERT_TRUE(raft::match(12345l, raft::abs(-12345l), raft::Compare<long int>()));
   ASSERT_TRUE(

From d436137b061140808404d3d4414a02a6e442a13f Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Mon, 16 Jan 2023 19:04:07 +0100
Subject: [PATCH 03/13] Replace myXxx with raft::xxx

---
 cpp/include/raft/distance/detail/canberra.cuh |  4 ++--
 .../raft/distance/detail/chebyshev.cuh        |  4 ++--
 .../raft/distance/detail/correlation.cuh      |  2 +-
 .../raft/distance/detail/euclidean.cuh        |  6 ++---
 .../raft/distance/detail/fused_l2_nn.cuh      |  2 +-
 .../raft/distance/detail/hellinger.cuh        |  2 +-
 .../raft/distance/detail/jensen_shannon.cuh   |  6 ++---
 .../raft/distance/detail/kl_divergence.cuh    | 12 +++++-----
 cpp/include/raft/distance/detail/l1.cuh       |  2 +-
 .../raft/distance/detail/minkowski.cuh        |  6 ++---
 cpp/include/raft/linalg/detail/lstsq.cuh      |  2 +-
 cpp/include/raft/matrix/detail/math.cuh       |  4 ++--
 .../raft/random/detail/make_regression.cuh    |  4 ++--
 cpp/include/raft/random/detail/rng_device.cuh | 22 +++++++++----------
 cpp/include/raft/stats/detail/stddev.cuh      |  4 ++--
 cpp/include/raft/util/cuda_utils.cuh          |  4 ++--
 cpp/test/distance/distance_base.cuh           | 22 +++++++++----------
 cpp/test/distance/fused_l2_nn.cu              |  2 +-
 cpp/test/linalg/matrix_vector.cu              |  2 +-
 cpp/test/linalg/norm.cu                       |  8 +++----
 cpp/test/linalg/power.cu                      |  4 ++--
 cpp/test/linalg/sqrt.cu                       |  2 +-
 cpp/test/neighbors/ann_utils.cuh              |  2 +-
 cpp/test/random/rng.cu                        | 12 +++++-----
 24 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/cpp/include/raft/distance/detail/canberra.cuh b/cpp/include/raft/distance/detail/canberra.cuh
index 43a904edba..f17a26dc4b 100644
--- a/cpp/include/raft/distance/detail/canberra.cuh
+++ b/cpp/include/raft/distance/detail/canberra.cuh
@@ -73,8 +73,8 @@ static void canberraImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::myAbs(x - y);
-    const auto add  = raft::myAbs(x) + raft::myAbs(y);
+    const auto diff = raft::abs(x - y);
+    const auto add  = raft::abs(x) + raft::abs(y);
     // deal with potential for 0 in denominator by
     // forcing 1/0 instead
     acc += ((add != 0) * diff / (add + (add == 0)));
diff --git a/cpp/include/raft/distance/detail/chebyshev.cuh b/cpp/include/raft/distance/detail/chebyshev.cuh
index 52573bd170..43b36e7921 100644
--- a/cpp/include/raft/distance/detail/chebyshev.cuh
+++ b/cpp/include/raft/distance/detail/chebyshev.cuh
@@ -73,8 +73,8 @@ static void chebyshevImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::myAbs(x - y);
-    acc             = raft::myMax(acc, diff);
+    const auto diff = raft::abs(x - y);
+    acc             = raft::max(acc, diff);
   };
 
   // epilogue operation lambda for final value calculation
diff --git a/cpp/include/raft/distance/detail/correlation.cuh b/cpp/include/raft/distance/detail/correlation.cuh
index 9bdbbf112c..849e79679a 100644
--- a/cpp/include/raft/distance/detail/correlation.cuh
+++ b/cpp/include/raft/distance/detail/correlation.cuh
@@ -125,7 +125,7 @@ static void correlationImpl(const DataT* x,
         auto Q_denom = k * regx2n[i] - (regxn[i] * regxn[i]);
         auto R_denom = k * regy2n[j] - (regyn[j] * regyn[j]);
 
-        acc[i][j] = 1 - (numer / raft::mySqrt(Q_denom * R_denom));
+        acc[i][j] = 1 - (numer / raft::sqrt(Q_denom * R_denom));
       }
     }
   };
diff --git a/cpp/include/raft/distance/detail/euclidean.cuh b/cpp/include/raft/distance/detail/euclidean.cuh
index 4184810fff..d508894682 100644
--- a/cpp/include/raft/distance/detail/euclidean.cuh
+++ b/cpp/include/raft/distance/detail/euclidean.cuh
@@ -34,7 +34,7 @@ struct L2ExpandedOp {
   __device__ AccT operator()(DataT& aNorm, const DataT& bNorm, DataT& accVal) const noexcept
   {
     AccT outVal = aNorm + bNorm - DataT(2.0) * accVal;
-    return sqrt ? raft::mySqrt(outVal) : outVal;
+    return sqrt ? raft::sqrt(outVal) : outVal;
   }
 
   __device__ AccT operator()(DataT aData) const noexcept { return aData; }
@@ -130,7 +130,7 @@ void euclideanExpImpl(const DataT* x,
         for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
 #pragma unroll
           for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
-            acc[i][j] = raft::mySqrt(acc[i][j]);
+            acc[i][j] = raft::sqrt(acc[i][j]);
           }
         }
       }
@@ -350,7 +350,7 @@ void euclideanUnExpImpl(const DataT* x,
       for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
 #pragma unroll
         for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
-          acc[i][j] = raft::mySqrt(acc[i][j]);
+          acc[i][j] = raft::sqrt(acc[i][j]);
         }
       }
     }
diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index c9750df8ad..13a9d17024 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -175,7 +175,7 @@ __global__ __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min,
 #pragma unroll
         for (int j = 0; j < P::AccColsPerTh; ++j) {
           auto acc_ij = acc[i][j];
-          acc[i][j]   = acc_ij > DataT{0} ? raft::mySqrt(acc_ij) : DataT{0};
+          acc[i][j]   = acc_ij > DataT{0} ? raft::sqrt(acc_ij) : DataT{0};
         }
       }
     }
diff --git a/cpp/include/raft/distance/detail/hellinger.cuh b/cpp/include/raft/distance/detail/hellinger.cuh
index 51f462ab36..e32540f947 100644
--- a/cpp/include/raft/distance/detail/hellinger.cuh
+++ b/cpp/include/raft/distance/detail/hellinger.cuh
@@ -105,7 +105,7 @@ static void hellingerImpl(const DataT* x,
         // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative
         const auto finalVal  = (1 - acc[i][j]);
         const auto rectifier = (!signbit(finalVal));
-        acc[i][j]            = raft::mySqrt(rectifier * finalVal);
+        acc[i][j]            = raft::sqrt(rectifier * finalVal);
       }
     }
   };
diff --git a/cpp/include/raft/distance/detail/jensen_shannon.cuh b/cpp/include/raft/distance/detail/jensen_shannon.cuh
index 92ee071cf5..ebc8dddc90 100644
--- a/cpp/include/raft/distance/detail/jensen_shannon.cuh
+++ b/cpp/include/raft/distance/detail/jensen_shannon.cuh
@@ -78,11 +78,11 @@ static void jensenShannonImpl(const DataT* x,
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
     const DataT m     = 0.5f * (x + y);
     const bool m_zero = (m == 0);
-    const auto logM   = (!m_zero) * raft::myLog(m + m_zero);
+    const auto logM   = (!m_zero) * raft::log(m + m_zero);
 
     const bool x_zero = (x == 0);
     const bool y_zero = (y == 0);
-    acc += (-x * (logM - raft::myLog(x + x_zero))) + (-y * (logM - raft::myLog(y + y_zero)));
+    acc += (-x * (logM - raft::log(x + x_zero))) + (-y * (logM - raft::log(y + y_zero)));
   };
 
   // epilogue operation lambda for final value calculation
@@ -95,7 +95,7 @@ static void jensenShannonImpl(const DataT* x,
     for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
 #pragma unroll
       for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
-        acc[i][j] = raft::mySqrt(0.5 * acc[i][j]);
+        acc[i][j] = raft::sqrt(0.5 * acc[i][j]);
       }
     }
   };
diff --git a/cpp/include/raft/distance/detail/kl_divergence.cuh b/cpp/include/raft/distance/detail/kl_divergence.cuh
index 4c0c4b6ace..031d4a481e 100644
--- a/cpp/include/raft/distance/detail/kl_divergence.cuh
+++ b/cpp/include/raft/distance/detail/kl_divergence.cuh
@@ -81,10 +81,10 @@ static void klDivergenceImpl(const DataT* x,
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
     if (isRowMajor) {
       const bool x_zero = (x == 0);
-      acc += x * (raft::myLog(x + x_zero) - y);
+      acc += x * (raft::log(x + x_zero) - y);
     } else {
       const bool y_zero = (y == 0);
-      acc += y * (raft::myLog(y + y_zero) - x);
+      acc += y * (raft::log(y + y_zero) - x);
     }
   };
 
@@ -92,23 +92,23 @@ static void klDivergenceImpl(const DataT* x,
     if (isRowMajor) {
       const bool x_zero = (x == 0);
       const bool y_zero = (y == 0);
-      acc += x * (raft::myLog(x + x_zero) - (!y_zero) * raft::myLog(y + y_zero));
+      acc += x * (raft::log(x + x_zero) - (!y_zero) * raft::log(y + y_zero));
     } else {
       const bool y_zero = (y == 0);
       const bool x_zero = (x == 0);
-      acc += y * (raft::myLog(y + y_zero) - (!x_zero) * raft::myLog(x + x_zero));
+      acc += y * (raft::log(y + y_zero) - (!x_zero) * raft::log(x + x_zero));
     }
   };
 
   auto unaryOp_lambda = [] __device__(DataT input) {
     const bool x_zero = (input == 0);
-    return (!x_zero) * raft::myLog(input + x_zero);
+    return (!x_zero) * raft::log(input + x_zero);
   };
 
   auto unaryOp_lambda_reverse = [] __device__(DataT input) {
     // reverse previous log (x) back to x using (e ^ log(x))
     const bool x_zero = (input == 0);
-    return (!x_zero) * raft::myExp(input);
+    return (!x_zero) * raft::exp(input);
   };
 
   // epilogue operation lambda for final value calculation
diff --git a/cpp/include/raft/distance/detail/l1.cuh b/cpp/include/raft/distance/detail/l1.cuh
index 87893bab7c..bf10651b60 100644
--- a/cpp/include/raft/distance/detail/l1.cuh
+++ b/cpp/include/raft/distance/detail/l1.cuh
@@ -71,7 +71,7 @@ static void l1Impl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::myAbs(x - y);
+    const auto diff = raft::abs(x - y);
     acc += diff;
   };
 
diff --git a/cpp/include/raft/distance/detail/minkowski.cuh b/cpp/include/raft/distance/detail/minkowski.cuh
index bda83babf1..58105fbc0b 100644
--- a/cpp/include/raft/distance/detail/minkowski.cuh
+++ b/cpp/include/raft/distance/detail/minkowski.cuh
@@ -74,8 +74,8 @@ void minkowskiUnExpImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [p] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::myAbs(x - y);
-    acc += raft::myPow(diff, p);
+    const auto diff = raft::abs(x - y);
+    acc += raft::pow(diff, p);
   };
 
   // epilogue operation lambda for final value calculation
@@ -89,7 +89,7 @@ void minkowskiUnExpImpl(const DataT* x,
     for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
 #pragma unroll
       for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
-        acc[i][j] = raft::myPow(acc[i][j], one_over_p);
+        acc[i][j] = raft::pow(acc[i][j], one_over_p);
       }
     }
   };
diff --git a/cpp/include/raft/linalg/detail/lstsq.cuh b/cpp/include/raft/linalg/detail/lstsq.cuh
index 1273956b21..5fa9f9f957 100644
--- a/cpp/include/raft/linalg/detail/lstsq.cuh
+++ b/cpp/include/raft/linalg/detail/lstsq.cuh
@@ -104,7 +104,7 @@ struct DivideByNonZero {
 
   operator()(const math_t a, const math_t b) const
   {
-    return raft::myAbs<math_t>(b) >= eps ? a / b : a;
+    return raft::abs<math_t>(b) >= eps ? a / b : a;
   }
 };
 
diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh
index c559da3942..8fb805cf61 100644
--- a/cpp/include/raft/matrix/detail/math.cuh
+++ b/cpp/include/raft/matrix/detail/math.cuh
@@ -278,7 +278,7 @@ void matrixVectorBinaryDivSkipZero(Type* data,
       rowMajor,
       bcastAlongRows,
       [] __device__(Type a, Type b) {
-        if (raft::myAbs(b) < Type(1e-10))
+        if (raft::abs(b) < Type(1e-10))
           return Type(0);
         else
           return a / b;
@@ -294,7 +294,7 @@ void matrixVectorBinaryDivSkipZero(Type* data,
       rowMajor,
       bcastAlongRows,
       [] __device__(Type a, Type b) {
-        if (raft::myAbs(b) < Type(1e-10))
+        if (raft::abs(b) < Type(1e-10))
           return a;
         else
           return a / b;
diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh
index cb0949c458..e1fd7da3fe 100644
--- a/cpp/include/raft/random/detail/make_regression.cuh
+++ b/cpp/include/raft/random/detail/make_regression.cuh
@@ -44,8 +44,8 @@ static __global__ void _singular_profile_kernel(DataT* out, IdxT n, DataT tail_s
   IdxT tid = threadIdx.x + blockIdx.x * blockDim.x;
   if (tid < n) {
     DataT sval     = static_cast<DataT>(tid) / rank;
-    DataT low_rank = ((DataT)1.0 - tail_strength) * raft::myExp(-sval * sval);
-    DataT tail     = tail_strength * raft::myExp((DataT)-0.1 * sval);
+    DataT low_rank = ((DataT)1.0 - tail_strength) * raft::exp(-sval * sval);
+    DataT tail     = tail_strength * raft::exp((DataT)-0.1 * sval);
     out[tid]       = low_rank + tail;
   }
 }
diff --git a/cpp/include/raft/random/detail/rng_device.cuh b/cpp/include/raft/random/detail/rng_device.cuh
index 6c75a4fa78..5909396a50 100644
--- a/cpp/include/raft/random/detail/rng_device.cuh
+++ b/cpp/include/raft/random/detail/rng_device.cuh
@@ -143,10 +143,10 @@ DI void box_muller_transform(Type& val1, Type& val2, Type sigma1, Type mu1, Type
 {
   constexpr Type twoPi  = Type(2.0) * Type(3.141592654);
   constexpr Type minus2 = -Type(2.0);
-  Type R                = raft::mySqrt(minus2 * raft::myLog(val1));
+  Type R                = raft::sqrt(minus2 * raft::log(val1));
   Type theta            = twoPi * val2;
   Type s, c;
-  raft::mySinCos(theta, s, c);
+  raft::sincos(theta, &s, &c);
   val1 = R * c * sigma1 + mu1;
   val2 = R * s * sigma2 + mu2;
 }
@@ -323,7 +323,7 @@ DI void custom_next(
     gen.next(res);
   } while (res == OutType(0.0));
 
-  *val = params.mu - params.beta * raft::myLog(-raft::myLog(res));
+  *val = params.mu - params.beta * raft::log(-raft::log(res));
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -340,8 +340,8 @@ DI void custom_next(GenType& gen,
 
   gen.next(res2);
   box_muller_transform<OutType>(res1, res2, params.sigma, params.mu);
-  *val       = raft::myExp(res1);
-  *(val + 1) = raft::myExp(res2);
+  *val       = raft::exp(res1);
+  *(val + 1) = raft::exp(res2);
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -358,7 +358,7 @@ DI void custom_next(GenType& gen,
   } while (res == OutType(0.0));
 
   constexpr OutType one = (OutType)1.0;
-  *val                  = params.mu - params.scale * raft::myLog(one / res - one);
+  *val                  = params.mu - params.scale * raft::log(one / res - one);
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -371,7 +371,7 @@ DI void custom_next(GenType& gen,
   OutType res;
   gen.next(res);
   constexpr OutType one = (OutType)1.0;
-  *val                  = -raft::myLog(one - res) / params.lambda;
+  *val                  = -raft::log(one - res) / params.lambda;
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -386,7 +386,7 @@ DI void custom_next(GenType& gen,
 
   constexpr OutType one = (OutType)1.0;
   constexpr OutType two = (OutType)2.0;
-  *val                  = raft::mySqrt(-two * raft::myLog(one - res)) * params.sigma;
+  *val                  = raft::sqrt(-two * raft::log(one - res)) * params.sigma;
 }
 
 template <typename GenType, typename OutType, typename LenType>
@@ -409,9 +409,9 @@ DI void custom_next(GenType& gen,
   // The <= comparison here means, number of samples going in `if` branch are more by 1 than `else`
   // branch. However it does not matter as for 0.5 both branches evaluate to same result.
   if (res <= oneHalf) {
-    out = params.mu + params.scale * raft::myLog(two * res);
+    out = params.mu + params.scale * raft::log(two * res);
   } else {
-    out = params.mu - params.scale * raft::myLog(two * (one - res));
+    out = params.mu - params.scale * raft::log(two * (one - res));
   }
   *val = out;
 }
@@ -424,7 +424,7 @@ DI void custom_next(
   gen.next(res);
   params.inIdxPtr[idx]  = idx;
   constexpr OutType one = (OutType)1.0;
-  auto exp              = -raft::myLog(one - res);
+  auto exp              = -raft::log(one - res);
   if (params.wts != nullptr) {
     *val = exp / params.wts[idx];
   } else {
diff --git a/cpp/include/raft/stats/detail/stddev.cuh b/cpp/include/raft/stats/detail/stddev.cuh
index ccea2ea5da..dc1a125de1 100644
--- a/cpp/include/raft/stats/detail/stddev.cuh
+++ b/cpp/include/raft/stats/detail/stddev.cuh
@@ -63,7 +63,7 @@ __global__ void stddevKernelColMajor(
     thread_data += diff * diff;
   }
   Type acc = BlockReduce(temp_storage).Sum(thread_data);
-  if (threadIdx.x == 0) { std[blockIdx.x] = raft::mySqrt(acc / N); }
+  if (threadIdx.x == 0) { std[blockIdx.x] = raft::sqrt(acc / N); }
 }
 
 template <typename Type, typename IdxType, int TPB>
@@ -126,7 +126,7 @@ void stddev(Type* std,
       std,
       mu,
       D,
-      [ratio] __device__(Type a, Type b) { return raft::mySqrt(a * ratio - b * b); },
+      [ratio] __device__(Type a, Type b) { return raft::sqrt(a * ratio - b * b); },
       stream);
   } else {
     stddevKernelColMajor<Type, IdxType, TPB><<<D, TPB, 0, stream>>>(std, data, mu, D, N);
diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index 2d515cbca8..66b14d7099 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -303,7 +303,7 @@ HDI double myMin<double>(double x, double y)
 template <typename T>
 DI T myAtomicMin(T* address, T val)
 {
-  myAtomicReduce(address, val, myMin<T>);
+  myAtomicReduce(address, val, raft::min_op{});
   return *address;
 }
 
@@ -317,7 +317,7 @@ DI T myAtomicMin(T* address, T val)
 template <typename T>
 DI T myAtomicMax(T* address, T val)
 {
-  myAtomicReduce(address, val, myMax<T>);
+  myAtomicReduce(address, val, raft::max_op{});
   return *address;
 }
 
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index cbfd97ebc6..fedbee919d 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -52,7 +52,7 @@ __global__ void naiveDistanceKernel(DataType* dist,
   }
   if (type == raft::distance::DistanceType::L2SqrtExpanded ||
       type == raft::distance::DistanceType::L2SqrtUnexpanded)
-    acc = raft::mySqrt(acc);
+    acc = raft::sqrt(acc);
   int outidx   = isRowMajor ? midx * n + nidx : midx + m * nidx;
   dist[outidx] = acc;
 }
@@ -79,9 +79,9 @@ __global__ void naiveL1_Linf_CanberraDistanceKernel(DataType* dist,
     auto b    = y[yidx];
     auto diff = (a > b) ? (a - b) : (b - a);
     if (type == raft::distance::DistanceType::Linf) {
-      acc = raft::myMax(acc, diff);
+      acc = raft::max(acc, diff);
     } else if (type == raft::distance::DistanceType::Canberra) {
-      const auto add = raft::myAbs(a) + raft::myAbs(b);
+      const auto add = raft::abs(a) + raft::abs(b);
       // deal with potential for 0 in denominator by
       // forcing 1/0 instead
       acc += ((add != 0) * diff / (add + (add == 0)));
@@ -119,7 +119,7 @@ __global__ void naiveCosineDistanceKernel(
   int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx;
 
   // Use 1.0 - (cosine similarity) to calc the distance
-  dist[outidx] = (DataType)1.0 - acc_ab / (raft::mySqrt(acc_a) * raft::mySqrt(acc_b));
+  dist[outidx] = (DataType)1.0 - acc_ab / (raft::sqrt(acc_a) * raft::sqrt(acc_b));
 }
 
 template <typename DataType>
@@ -137,7 +137,7 @@ __global__ void naiveHellingerDistanceKernel(
     int yidx = isRowMajor ? i + nidx * k : i * n + nidx;
     auto a   = x[xidx];
     auto b   = y[yidx];
-    acc_ab += raft::mySqrt(a) * raft::mySqrt(b);
+    acc_ab += raft::sqrt(a) * raft::sqrt(b);
   }
 
   int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx;
@@ -145,7 +145,7 @@ __global__ void naiveHellingerDistanceKernel(
   // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative
   acc_ab         = 1 - acc_ab;
   auto rectifier = (!signbit(acc_ab));
-  dist[outidx]   = raft::mySqrt(rectifier * acc_ab);
+  dist[outidx]   = raft::sqrt(rectifier * acc_ab);
 }
 
 template <typename DataType>
@@ -167,11 +167,11 @@ __global__ void naiveLpUnexpDistanceKernel(DataType* dist,
     int yidx  = isRowMajor ? i + nidx * k : i * n + nidx;
     auto a    = x[xidx];
     auto b    = y[yidx];
-    auto diff = raft::myAbs(a - b);
-    acc += raft::myPow(diff, p);
+    auto diff = raft::abs(a - b);
+    acc += raft::pow(diff, p);
   }
   auto one_over_p = 1 / p;
-  acc             = raft::myPow(acc, one_over_p);
+  acc             = raft::pow(acc, one_over_p);
   int outidx      = isRowMajor ? midx * n + nidx : midx + m * nidx;
   dist[outidx]    = acc;
 }
@@ -222,7 +222,7 @@ __global__ void naiveJensenShannonDistanceKernel(
 
     acc += (-a * (!p_zero * log(p + p_zero))) + (-b * (!q_zero * log(q + q_zero)));
   }
-  acc          = raft::mySqrt(0.5f * acc);
+  acc          = raft::sqrt(0.5f * acc);
   int outidx   = isRowMajor ? midx * n + nidx : midx + m * nidx;
   dist[outidx] = acc;
 }
@@ -297,7 +297,7 @@ __global__ void naiveCorrelationDistanceKernel(
   auto Q_denom = k * a_sq_norm - (a_norm * a_norm);
   auto R_denom = k * b_sq_norm - (b_norm * b_norm);
 
-  acc = 1 - (numer / raft::mySqrt(Q_denom * R_denom));
+  acc = 1 - (numer / raft::sqrt(Q_denom * R_denom));
 
   int outidx   = isRowMajor ? midx * n + nidx : midx + m * nidx;
   dist[outidx] = acc;
diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu
index e746a2382d..54de12307a 100644
--- a/cpp/test/distance/fused_l2_nn.cu
+++ b/cpp/test/distance/fused_l2_nn.cu
@@ -60,7 +60,7 @@ __global__ void naiveKernel(raft::KeyValuePair<int, DataT>* min,
     auto diff = midx >= m || nidx >= n ? DataT(0) : x[xidx] - y[yidx];
     acc += diff * diff;
   }
-  if (Sqrt) { acc = raft::mySqrt(acc); }
+  if (Sqrt) { acc = raft::sqrt(acc); }
   ReduceOpT redOp;
   typedef cub::WarpReduce<raft::KeyValuePair<int, DataT>> WarpReduce;
   __shared__ typename WarpReduce::TempStorage temp[NWARPS];
diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/test/linalg/matrix_vector.cu
index 7018e1da96..14dba23b14 100644
--- a/cpp/test/linalg/matrix_vector.cu
+++ b/cpp/test/linalg/matrix_vector.cu
@@ -116,7 +116,7 @@ void naive_matrix_vector_op_launch(const raft::handle_t& handle,
     }
   };
   auto operation_bin_div_skip_zero = [] __device__(T mat_element, T vec_element) {
-    if (raft::myAbs(vec_element) < T(1e-10))
+    if (raft::abs(vec_element) < T(1e-10))
       return T(0);
     else
       return mat_element / vec_element;
diff --git a/cpp/test/linalg/norm.cu b/cpp/test/linalg/norm.cu
index 94540b9ff6..89d9cecaed 100644
--- a/cpp/test/linalg/norm.cu
+++ b/cpp/test/linalg/norm.cu
@@ -56,10 +56,10 @@ __global__ void naiveRowNormKernel(
       if (type == L2Norm) {
         acc += data[rowStart * D + i] * data[rowStart * D + i];
       } else {
-        acc += raft::myAbs(data[rowStart * D + i]);
+        acc += raft::abs(data[rowStart * D + i]);
       }
     }
-    dots[rowStart] = do_sqrt ? raft::mySqrt(acc) : acc;
+    dots[rowStart] = do_sqrt ? raft::sqrt(acc) : acc;
   }
 }
 
@@ -131,10 +131,10 @@ __global__ void naiveColNormKernel(
   Type acc = 0;
   for (IdxT i = 0; i < N; i++) {
     Type v = data[colID + i * D];
-    acc += type == L2Norm ? v * v : raft::myAbs(v);
+    acc += type == L2Norm ? v * v : raft::abs(v);
   }
 
-  dots[colID] = do_sqrt ? raft::mySqrt(acc) : acc;
+  dots[colID] = do_sqrt ? raft::sqrt(acc) : acc;
 }
 
 template <typename Type, typename IdxT>
diff --git a/cpp/test/linalg/power.cu b/cpp/test/linalg/power.cu
index 54c2e2a7aa..215bf1b184 100644
--- a/cpp/test/linalg/power.cu
+++ b/cpp/test/linalg/power.cu
@@ -27,7 +27,7 @@ template <typename Type>
 __global__ void naivePowerElemKernel(Type* out, const Type* in1, const Type* in2, int len)
 {
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < len) { out[idx] = raft::myPow(in1[idx], in2[idx]); }
+  if (idx < len) { out[idx] = raft::pow(in1[idx], in2[idx]); }
 }
 
 template <typename Type>
@@ -43,7 +43,7 @@ template <typename Type>
 __global__ void naivePowerScalarKernel(Type* out, const Type* in1, const Type in2, int len)
 {
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < len) { out[idx] = raft::myPow(in1[idx], in2); }
+  if (idx < len) { out[idx] = raft::pow(in1[idx], in2); }
 }
 
 template <typename Type>
diff --git a/cpp/test/linalg/sqrt.cu b/cpp/test/linalg/sqrt.cu
index 9008313b58..2b330779b8 100644
--- a/cpp/test/linalg/sqrt.cu
+++ b/cpp/test/linalg/sqrt.cu
@@ -27,7 +27,7 @@ template <typename Type>
 __global__ void naiveSqrtElemKernel(Type* out, const Type* in1, int len)
 {
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < len) { out[idx] = raft::mySqrt(in1[idx]); }
+  if (idx < len) { out[idx] = raft::sqrt(in1[idx]); }
 }
 
 template <typename Type>
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index 05fe6ab92d..18d9dfbeef 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -131,7 +131,7 @@ __global__ void naive_distance_kernel(EvalT* dist,
     }
     if (type == raft::distance::DistanceType::L2SqrtExpanded ||
         type == raft::distance::DistanceType::L2SqrtUnexpanded)
-      acc = raft::mySqrt(acc);
+      acc = raft::sqrt(acc);
     dist[midx * n + nidx] = acc;
   }
 }
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index bdce79b76e..5122c20c5e 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -145,8 +145,8 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
       case RNG_LogNormal: {
         auto var   = params.end * params.end;
         auto mu    = params.start;
-        meanvar[0] = raft::myExp(mu + var * T(0.5));
-        meanvar[1] = (raft::myExp(var) - T(1.0)) * raft::myExp(T(2.0) * mu + var);
+        meanvar[0] = raft::exp(mu + var * T(0.5));
+        meanvar[1] = (raft::exp(var) - T(1.0)) * raft::exp(T(2.0) * mu + var);
         break;
       }
       case RNG_Uniform:
@@ -169,7 +169,7 @@ class RngTest : public ::testing::TestWithParam<RngInputs<T>> {
         meanvar[1] = meanvar[0] * meanvar[0];
         break;
       case RNG_Rayleigh:
-        meanvar[0] = params.start * raft::mySqrt(T(3.1415 / 2.0));
+        meanvar[0] = params.start * raft::sqrt(T(3.1415 / 2.0));
         meanvar[1] = ((T(4.0) - T(3.1415)) / T(2.0)) * params.start * params.start;
         break;
       case RNG_Laplace:
@@ -239,8 +239,8 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
       case RNG_LogNormal: {
         auto var   = params.end * params.end;
         auto mu    = params.start;
-        meanvar[0] = raft::myExp(mu + var * T(0.5));
-        meanvar[1] = (raft::myExp(var) - T(1.0)) * raft::myExp(T(2.0) * mu + var);
+        meanvar[0] = raft::exp(mu + var * T(0.5));
+        meanvar[1] = (raft::exp(var) - T(1.0)) * raft::exp(T(2.0) * mu + var);
         break;
       }
       case RNG_Uniform:
@@ -263,7 +263,7 @@ class RngMdspanTest : public ::testing::TestWithParam<RngInputs<T>> {
         meanvar[1] = meanvar[0] * meanvar[0];
         break;
       case RNG_Rayleigh:
-        meanvar[0] = params.start * raft::mySqrt(T(3.1415 / 2.0));
+        meanvar[0] = params.start * raft::sqrt(T(3.1415 / 2.0));
         meanvar[1] = ((T(4.0) - T(3.1415)) / T(2.0)) * params.start * params.start;
         break;
       case RNG_Laplace:

From 5ab403b5ef7350bbbe7424386bb1ac2b02a1cfd0 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Mon, 16 Jan 2023 19:29:49 +0100
Subject: [PATCH 04/13] Replace std or intrinsics with raft version when
 appropriate

---
 cpp/include/raft/core/math.hpp                | 28 ++++++++++++++++
 cpp/include/raft/core/operators.hpp           |  6 ++--
 .../raft/matrix/detail/linewise_op.cuh        |  4 +--
 cpp/include/raft/matrix/detail/math.cuh       |  4 +--
 .../sparse/distance/detail/l2_distance.cuh    | 10 +++---
 .../sparse/distance/detail/lp_distance.cuh    |  4 +--
 .../spatial/knn/detail/ball_cover/common.cuh  |  2 +-
 .../spatial/knn/detail/haversine_distance.cuh |  8 ++---
 .../raft/spectral/detail/spectral_util.cuh    |  2 +-
 cpp/test/core/math_device.cu                  | 32 +++++++++++++++++++
 cpp/test/core/math_host.cpp                   | 16 ++++++++++
 cpp/test/matrix/math.cu                       |  2 +-
 12 files changed, 96 insertions(+), 22 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index 7932126e16..ee0ef26cb4 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -52,6 +52,34 @@ constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
 }
 /** @} */
 
+/**
+ * Inverse cosine
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto acos(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::acos(x);
+#else
+  return std::acos(x);
+#endif
+}
+
+/**
+ * Inverse sine
+ */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto asin(const T& x)
+  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
+{
+#ifdef __CUDA_ARCH__
+  return ::asin(x);
+#else
+  return std::asin(x);
+#endif
+}
+
 /**
  * Inverse hyperbolic tangent
  */
diff --git a/cpp/include/raft/core/operators.hpp b/cpp/include/raft/core/operators.hpp
index 398354df46..ec26de8d39 100644
--- a/cpp/include/raft/core/operators.hpp
+++ b/cpp/include/raft/core/operators.hpp
@@ -77,7 +77,7 @@ struct sqrt_op {
   template <typename Type, typename... UnusedArgs>
   constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
   {
-    return std::sqrt(in);
+    return raft::sqrt(in);
   }
 };
 
@@ -93,7 +93,7 @@ struct abs_op {
   template <typename Type, typename... UnusedArgs>
   constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
   {
-    return std::abs(in);
+    return raft::abs(in);
   }
 };
 
@@ -150,7 +150,7 @@ struct pow_op {
   template <typename Type>
   constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
   {
-    return std::pow(a, b);
+    return raft::pow(a, b);
   }
 };
 
diff --git a/cpp/include/raft/matrix/detail/linewise_op.cuh b/cpp/include/raft/matrix/detail/linewise_op.cuh
index 605726bea6..de3e5a62c3 100644
--- a/cpp/include/raft/matrix/detail/linewise_op.cuh
+++ b/cpp/include/raft/matrix/detail/linewise_op.cuh
@@ -343,9 +343,7 @@ __global__ void __launch_bounds__(MaxOffset, 2)
 template <typename... Types>
 constexpr size_t maxSizeOf()
 {
-  size_t maxSize = 0;
-  ((maxSize = std::max(maxSize, sizeof(Types))), ...);
-  return maxSize;
+  return raft::max(sizeof(Types)...);
 }
 
 /**
diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh
index 8fb805cf61..0aa7cce470 100644
--- a/cpp/include/raft/matrix/detail/math.cuh
+++ b/cpp/include/raft/matrix/detail/math.cuh
@@ -87,10 +87,10 @@ void seqRoot(math_t* in,
         if (a < math_t(0)) {
           return math_t(0);
         } else {
-          return sqrt(a * scalar);
+          return raft::sqrt(a * scalar);
         }
       } else {
-        return sqrt(a * scalar);
+        return raft::sqrt(a * scalar);
       }
     },
     stream);
diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
index 3c852235df..40051cc853 100644
--- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
@@ -112,7 +112,7 @@ __global__ void compute_correlation_warp_kernel(value_t* __restrict__ C,
   value_t Q_denom = n * Q_l2 - (Q_l1 * Q_l1);
   value_t R_denom = n * R_l2 - (R_l1 * R_l1);
 
-  value_t val = 1 - (numer / sqrt(Q_denom * R_denom));
+  value_t val = 1 - (numer / raft::sqrt(Q_denom * R_denom));
 
   // correct for small instabilities
   C[(size_t)i * n_cols + j] = val * (fabs(val) >= 0.0001);
@@ -292,7 +292,7 @@ class l2_sqrt_expanded_distances_t : public l2_expanded_distances_t<value_idx, v
       this->config_->a_nrows * this->config_->b_nrows,
       [] __device__(value_t input) {
         int neg = input < 0 ? -1 : 1;
-        return sqrt(abs(input) * neg);
+        return raft::sqrt(abs(input) * neg);
       },
       this->config_->handle.get_stream());
   }
@@ -379,7 +379,7 @@ class cosine_expanded_distances_t : public distances_t<value_t> {
                config_->b_nrows,
                config_->handle.get_stream(),
                [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
-                 value_t norms = sqrt(q_norm) * sqrt(r_norm);
+                 value_t norms = raft::sqrt(q_norm) * raft::sqrt(r_norm);
                  // deal with potential for 0 in denominator by forcing 0/1 instead
                  value_t cos = ((norms != 0) * dot) / ((norms == 0) + norms);
 
@@ -429,7 +429,7 @@ class hellinger_expanded_distances_t : public distances_t<value_t> {
       out_dists,
       *config_,
       coo_rows.data(),
-      [] __device__(value_t a, value_t b) { return sqrt(a) * sqrt(b); },
+      [] __device__(value_t a, value_t b) { return raft::sqrt(a) * raft::sqrt(b); },
       raft::add_op(),
       raft::atomic_add_op());
 
@@ -440,7 +440,7 @@ class hellinger_expanded_distances_t : public distances_t<value_t> {
       [=] __device__(value_t input) {
         // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative
         bool rectifier = (1 - input) > 0;
-        return sqrt(rectifier * (1 - input));
+        return raft::sqrt(rectifier * (1 - input));
       },
       config_->handle.get_stream());
   }
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index a973aebbab..d25d7637f0 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -132,7 +132,7 @@ class l2_sqrt_unexpanded_distances_t : public l2_unexpanded_distances_t<value_id
       this->config_->a_nrows * this->config_->b_nrows,
       [] __device__(value_t input) {
         int neg = input < 0 ? -1 : 1;
-        return sqrt(abs(input) * neg);
+        return raft::sqrt(abs(input) * neg);
       },
       this->config_->handle.get_stream());
   }
@@ -274,7 +274,7 @@ class jensen_shannon_unexpanded_distances_t : public distances_t<value_t> {
       out_dists,
       out_dists,
       config_->a_nrows * config_->b_nrows,
-      [=] __device__(value_t input) { return sqrt(0.5 * input); },
+      [=] __device__(value_t input) { return raft::sqrt(0.5 * input); },
       config_->handle.get_stream());
   }
 
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
index b09cf0da10..e138e582d0 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
@@ -71,7 +71,7 @@ struct EuclideanFunc : public DistFunc<value_t, value_int> {
       sum_sq += diff * diff;
     }
 
-    return sqrt(sum_sq);
+    return raft::sqrt(sum_sq);
   }
 };
 
diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
index e073841dd3..9cecc0adf4 100644
--- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
+++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh
@@ -32,11 +32,11 @@ namespace detail {
 template <typename value_t>
 DI value_t compute_haversine(value_t x1, value_t y1, value_t x2, value_t y2)
 {
-  value_t sin_0 = sin(0.5 * (x1 - y1));
-  value_t sin_1 = sin(0.5 * (x2 - y2));
-  value_t rdist = sin_0 * sin_0 + cos(x1) * cos(y1) * sin_1 * sin_1;
+  value_t sin_0 = raft::sin(0.5 * (x1 - y1));
+  value_t sin_1 = raft::sin(0.5 * (x2 - y2));
+  value_t rdist = sin_0 * sin_0 + raft::cos(x1) * raft::cos(y1) * sin_1 * sin_1;
 
-  return 2 * asin(sqrt(rdist));
+  return 2 * raft::asin(raft::sqrt(rdist));
 }
 
 /**
diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh
index 3a0ad1f96f..3849362dd6 100644
--- a/cpp/include/raft/spectral/detail/spectral_util.cuh
+++ b/cpp/include/raft/spectral/detail/spectral_util.cuh
@@ -72,7 +72,7 @@ static __global__ void scale_obs_kernel(index_type_t m, index_type_t n, value_ty
 
   // scale by alpha
   alpha = __shfl_sync(warp_full_mask(), alpha, blockDim.x - 1, blockDim.x);
-  alpha = std::sqrt(alpha);
+  alpha = raft::sqrt(alpha);
   for (j = threadIdx.y + blockIdx.y * blockDim.y; j < n; j += blockDim.y * gridDim.y) {
     for (i = threadIdx.x; i < m; i += blockDim.x) {  // blockDim.x=32
       index      = i + j * m;
diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu
index a19d5bc8dd..6240d46ee4 100644
--- a/cpp/test/core/math_device.cu
+++ b/cpp/test/core/math_device.cu
@@ -62,6 +62,38 @@ TEST(MathDevice, Abs)
     raft::match(12.34, math_eval(abs_test_op{}, -12.34), raft::CompareApprox<double>(0.000001)));
 }
 
+struct acos_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::acos(in);
+  }
+};
+
+TEST(MathDevice, Acos)
+{
+  ASSERT_TRUE(raft::match(
+    std::acos(0.123f), math_eval(acos_test_op{}, 0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::acos(0.123), math_eval(acos_test_op{}, 0.123), raft::CompareApprox<double>(0.000001)));
+}
+
+struct asin_test_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
+  {
+    return raft::asin(in);
+  }
+};
+
+TEST(MathDevice, Asin)
+{
+  ASSERT_TRUE(raft::match(
+    std::asin(0.123f), math_eval(asin_test_op{}, 0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::asin(0.123), math_eval(asin_test_op{}, 0.123), raft::CompareApprox<double>(0.000001)));
+}
+
 struct atanh_test_op {
   template <typename Type>
   constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const
diff --git a/cpp/test/core/math_host.cpp b/cpp/test/core/math_host.cpp
index d23f4fc8da..237b9fffc5 100644
--- a/cpp/test/core/math_host.cpp
+++ b/cpp/test/core/math_host.cpp
@@ -32,6 +32,22 @@ TEST(MathHost, Abs)
   ASSERT_TRUE(raft::match(12.34, raft::abs(-12.34), raft::CompareApprox<double>(0.000001)));
 }
 
+TEST(MathHost, Acos)
+{
+  ASSERT_TRUE(
+    raft::match(std::acos(0.123f), raft::acos(0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::acos(0.123), raft::acos(0.123), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(MathHost, Asin)
+{
+  ASSERT_TRUE(
+    raft::match(std::asin(0.123f), raft::asin(0.123f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(std::asin(0.123), raft::asin(0.123), raft::CompareApprox<double>(0.000001)));
+}
+
 TEST(MathHost, Atanh)
 {
   ASSERT_TRUE(
diff --git a/cpp/test/matrix/math.cu b/cpp/test/matrix/math.cu
index f2c1a6249c..20bd5458af 100644
--- a/cpp/test/matrix/math.cu
+++ b/cpp/test/matrix/math.cu
@@ -51,7 +51,7 @@ template <typename Type>
 __global__ void naiveSqrtKernel(Type* in, Type* out, int len)
 {
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < len) { out[idx] = std::sqrt(in[idx]); }
+  if (idx < len) { out[idx] = raft::sqrt(in[idx]); }
 }
 
 template <typename Type>

From 683f5b60ff6bd1c0a292508548ce093510ac209c Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Tue, 17 Jan 2023 12:24:55 +0100
Subject: [PATCH 05/13] Update copyright year

---
 cpp/include/raft/core/math.hpp                            | 2 +-
 cpp/include/raft/distance/detail/correlation.cuh          | 2 +-
 cpp/include/raft/distance/detail/euclidean.cuh            | 2 +-
 cpp/include/raft/distance/detail/fused_l2_nn.cuh          | 2 +-
 cpp/include/raft/distance/detail/hellinger.cuh            | 2 +-
 cpp/include/raft/distance/detail/jensen_shannon.cuh       | 2 +-
 cpp/include/raft/distance/detail/kl_divergence.cuh        | 2 +-
 cpp/include/raft/distance/detail/minkowski.cuh            | 2 +-
 cpp/include/raft/linalg/detail/lstsq.cuh                  | 2 +-
 cpp/include/raft/matrix/detail/linewise_op.cuh            | 2 +-
 cpp/include/raft/matrix/detail/math.cuh                   | 2 +-
 cpp/include/raft/random/detail/make_regression.cuh        | 2 +-
 cpp/include/raft/random/detail/rng_device.cuh             | 2 +-
 cpp/include/raft/sparse/distance/detail/l2_distance.cuh   | 2 +-
 cpp/include/raft/sparse/distance/detail/lp_distance.cuh   | 2 +-
 cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh | 2 +-
 cpp/include/raft/spectral/detail/spectral_util.cuh        | 2 +-
 cpp/include/raft/stats/detail/stddev.cuh                  | 2 +-
 cpp/include/raft/util/cuda_utils.cuh                      | 2 +-
 cpp/test/core/math_device.cu                              | 2 +-
 cpp/test/core/math_host.cpp                               | 2 +-
 cpp/test/linalg/matrix_vector.cu                          | 2 +-
 cpp/test/linalg/norm.cu                                   | 2 +-
 cpp/test/linalg/power.cu                                  | 2 +-
 cpp/test/linalg/sqrt.cu                                   | 2 +-
 cpp/test/matrix/math.cu                                   | 2 +-
 cpp/test/neighbors/ann_utils.cuh                          | 2 +-
 cpp/test/random/rng.cu                                    | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index ee0ef26cb4..cb27a1856c 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/correlation.cuh b/cpp/include/raft/distance/detail/correlation.cuh
index 849e79679a..f7fe3678e6 100644
--- a/cpp/include/raft/distance/detail/correlation.cuh
+++ b/cpp/include/raft/distance/detail/correlation.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/euclidean.cuh b/cpp/include/raft/distance/detail/euclidean.cuh
index d508894682..1a2db63f5c 100644
--- a/cpp/include/raft/distance/detail/euclidean.cuh
+++ b/cpp/include/raft/distance/detail/euclidean.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index 13a9d17024..447359ffe6 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/hellinger.cuh b/cpp/include/raft/distance/detail/hellinger.cuh
index e32540f947..13507fe84f 100644
--- a/cpp/include/raft/distance/detail/hellinger.cuh
+++ b/cpp/include/raft/distance/detail/hellinger.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/jensen_shannon.cuh b/cpp/include/raft/distance/detail/jensen_shannon.cuh
index ebc8dddc90..6489abd8ee 100644
--- a/cpp/include/raft/distance/detail/jensen_shannon.cuh
+++ b/cpp/include/raft/distance/detail/jensen_shannon.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/kl_divergence.cuh b/cpp/include/raft/distance/detail/kl_divergence.cuh
index 031d4a481e..95ac87664f 100644
--- a/cpp/include/raft/distance/detail/kl_divergence.cuh
+++ b/cpp/include/raft/distance/detail/kl_divergence.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/minkowski.cuh b/cpp/include/raft/distance/detail/minkowski.cuh
index 58105fbc0b..42af8cd281 100644
--- a/cpp/include/raft/distance/detail/minkowski.cuh
+++ b/cpp/include/raft/distance/detail/minkowski.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/linalg/detail/lstsq.cuh b/cpp/include/raft/linalg/detail/lstsq.cuh
index 5fa9f9f957..f0cf300e2f 100644
--- a/cpp/include/raft/linalg/detail/lstsq.cuh
+++ b/cpp/include/raft/linalg/detail/lstsq.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/matrix/detail/linewise_op.cuh b/cpp/include/raft/matrix/detail/linewise_op.cuh
index de3e5a62c3..2f000f5bc2 100644
--- a/cpp/include/raft/matrix/detail/linewise_op.cuh
+++ b/cpp/include/raft/matrix/detail/linewise_op.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh
index 0aa7cce470..f5c33d1cf6 100644
--- a/cpp/include/raft/matrix/detail/math.cuh
+++ b/cpp/include/raft/matrix/detail/math.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh
index e1fd7da3fe..057196cd74 100644
--- a/cpp/include/raft/random/detail/make_regression.cuh
+++ b/cpp/include/raft/random/detail/make_regression.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/random/detail/rng_device.cuh b/cpp/include/raft/random/detail/rng_device.cuh
index 5909396a50..9aee47c387 100644
--- a/cpp/include/raft/random/detail/rng_device.cuh
+++ b/cpp/include/raft/random/detail/rng_device.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
index 40051cc853..2f165b3ff2 100644
--- a/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/l2_distance.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index d25d7637f0..f67109afbc 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
index e138e582d0..0a6718f5a5 100644
--- a/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh
index 3849362dd6..5991e71ec6 100644
--- a/cpp/include/raft/spectral/detail/spectral_util.cuh
+++ b/cpp/include/raft/spectral/detail/spectral_util.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/stats/detail/stddev.cuh b/cpp/include/raft/stats/detail/stddev.cuh
index dc1a125de1..2f7e22ca8a 100644
--- a/cpp/include/raft/stats/detail/stddev.cuh
+++ b/cpp/include/raft/stats/detail/stddev.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index 66b14d7099..c7dcf8d047 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu
index 6240d46ee4..ff4b343d9e 100644
--- a/cpp/test/core/math_device.cu
+++ b/cpp/test/core/math_device.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/core/math_host.cpp b/cpp/test/core/math_host.cpp
index 237b9fffc5..5808905713 100644
--- a/cpp/test/core/math_host.cpp
+++ b/cpp/test/core/math_host.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/test/linalg/matrix_vector.cu
index 14dba23b14..abacd60668 100644
--- a/cpp/test/linalg/matrix_vector.cu
+++ b/cpp/test/linalg/matrix_vector.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/linalg/norm.cu b/cpp/test/linalg/norm.cu
index 89d9cecaed..90cfbd8f89 100644
--- a/cpp/test/linalg/norm.cu
+++ b/cpp/test/linalg/norm.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/linalg/power.cu b/cpp/test/linalg/power.cu
index 215bf1b184..5cb63a5697 100644
--- a/cpp/test/linalg/power.cu
+++ b/cpp/test/linalg/power.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/linalg/sqrt.cu b/cpp/test/linalg/sqrt.cu
index 2b330779b8..93150ca77d 100644
--- a/cpp/test/linalg/sqrt.cu
+++ b/cpp/test/linalg/sqrt.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/matrix/math.cu b/cpp/test/matrix/math.cu
index 20bd5458af..9dcbfc8899 100644
--- a/cpp/test/matrix/math.cu
+++ b/cpp/test/matrix/math.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index 18d9dfbeef..1a9399cccf 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 5122c20c5e..0bf494b624 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 42fe0c08ca2fecab004cb3b22f7efe00e31e7354 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Tue, 17 Jan 2023 12:32:16 +0100
Subject: [PATCH 06/13] Fix copyright years

---
 cpp/include/raft/distance/detail/jensen_shannon.cuh | 2 +-
 cpp/include/raft/distance/detail/kl_divergence.cuh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/raft/distance/detail/jensen_shannon.cuh b/cpp/include/raft/distance/detail/jensen_shannon.cuh
index f702ab0082..f96da01b87 100644
--- a/cpp/include/raft/distance/detail/jensen_shannon.cuh
+++ b/cpp/include/raft/distance/detail/jensen_shannon.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/distance/detail/kl_divergence.cuh b/cpp/include/raft/distance/detail/kl_divergence.cuh
index 6475cf9097..7ebeaf4de9 100644
--- a/cpp/include/raft/distance/detail/kl_divergence.cuh
+++ b/cpp/include/raft/distance/detail/kl_divergence.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From c93359be21f9a23de2fcfdf697798726665c9a07 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Tue, 17 Jan 2023 13:33:41 +0100
Subject: [PATCH 07/13] Clang format

---
 cpp/include/raft/util/cuda_utils.cuh | 76 ++++++++++++----------------
 1 file changed, 32 insertions(+), 44 deletions(-)

diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index c7dcf8d047..5be9dc999a 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -260,14 +260,14 @@ DI double myAtomicMax(double* address, double val)
 template <typename T>
 HDI T myMax(T x, T y);
 template <>
-[[deprecated("use raft::max from raft/core/math.hpp instead")]]
-HDI float myMax<float>(float x, float y)
+[[deprecated("use raft::max from raft/core/math.hpp instead")]] HDI float myMax<float>(float x,
+                                                                                       float y)
 {
   return fmaxf(x, y);
 }
 template <>
-[[deprecated("use raft::max from raft/core/math.hpp instead")]]
-HDI double myMax<double>(double x, double y)
+[[deprecated("use raft::max from raft/core/math.hpp instead")]] HDI double myMax<double>(double x,
+                                                                                         double y)
 {
   return fmax(x, y);
 }
@@ -280,14 +280,14 @@ HDI double myMax<double>(double x, double y)
 template <typename T>
 HDI T myMin(T x, T y);
 template <>
-[[deprecated("use raft::min from raft/core/math.hpp instead")]]
-HDI float myMin<float>(float x, float y)
+[[deprecated("use raft::min from raft/core/math.hpp instead")]] HDI float myMin<float>(float x,
+                                                                                       float y)
 {
   return fminf(x, y);
 }
 template <>
-[[deprecated("use raft::min from raft/core/math.hpp instead")]]
-HDI double myMin<double>(double x, double y)
+[[deprecated("use raft::min from raft/core/math.hpp instead")]] HDI double myMin<double>(double x,
+                                                                                         double y)
 {
   return fmin(x, y);
 }
@@ -328,14 +328,12 @@ DI T myAtomicMax(T* address, T val)
 template <typename T>
 HDI T myExp(T x);
 template <>
-[[deprecated("use raft::exp from raft/core/math.hpp instead")]]
-HDI float myExp(float x)
+[[deprecated("use raft::exp from raft/core/math.hpp instead")]] HDI float myExp(float x)
 {
   return expf(x);
 }
 template <>
-[[deprecated("use raft::exp from raft/core/math.hpp instead")]]
-HDI double myExp(double x)
+[[deprecated("use raft::exp from raft/core/math.hpp instead")]] HDI double myExp(double x)
 {
   return ::exp(x);
 }
@@ -366,14 +364,12 @@ inline __device__ double myInf<double>()
 template <typename T>
 HDI T myLog(T x);
 template <>
-[[deprecated("use raft::log from raft/core/math.hpp instead")]]
-HDI float myLog(float x)
+[[deprecated("use raft::log from raft/core/math.hpp instead")]] HDI float myLog(float x)
 {
   return logf(x);
 }
 template <>
-[[deprecated("use raft::log from raft/core/math.hpp instead")]]
-HDI double myLog(double x)
+[[deprecated("use raft::log from raft/core/math.hpp instead")]] HDI double myLog(double x)
 {
   return ::log(x);
 }
@@ -386,14 +382,12 @@ HDI double myLog(double x)
 template <typename T>
 HDI T mySqrt(T x);
 template <>
-[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]]
-HDI float mySqrt(float x)
+[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]] HDI float mySqrt(float x)
 {
   return sqrtf(x);
 }
 template <>
-[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]]
-HDI double mySqrt(double x)
+[[deprecated("use raft::sqrt from raft/core/math.hpp instead")]] HDI double mySqrt(double x)
 {
   return ::sqrt(x);
 }
@@ -406,14 +400,16 @@ HDI double mySqrt(double x)
 template <typename T>
 DI void mySinCos(T x, T& s, T& c);
 template <>
-[[deprecated("use raft::sincos from raft/core/math.hpp instead")]]
-DI void mySinCos(float x, float& s, float& c)
+[[deprecated("use raft::sincos from raft/core/math.hpp instead")]] DI void mySinCos(float x,
+                                                                                    float& s,
+                                                                                    float& c)
 {
   sincosf(x, &s, &c);
 }
 template <>
-[[deprecated("use raft::sincos from raft/core/math.hpp instead")]]
-DI void mySinCos(double x, double& s, double& c)
+[[deprecated("use raft::sincos from raft/core/math.hpp instead")]] DI void mySinCos(double x,
+                                                                                    double& s,
+                                                                                    double& c)
 {
   ::sincos(x, &s, &c);
 }
@@ -426,14 +422,12 @@ DI void mySinCos(double x, double& s, double& c)
 template <typename T>
 DI T mySin(T x);
 template <>
-[[deprecated("use raft::sin from raft/core/math.hpp instead")]]
-DI float mySin(float x)
+[[deprecated("use raft::sin from raft/core/math.hpp instead")]] DI float mySin(float x)
 {
   return sinf(x);
 }
 template <>
-[[deprecated("use raft::sin from raft/core/math.hpp instead")]]
-DI double mySin(double x)
+[[deprecated("use raft::sin from raft/core/math.hpp instead")]] DI double mySin(double x)
 {
   return ::sin(x);
 }
@@ -449,14 +443,12 @@ DI T myAbs(T x)
   return x < 0 ? -x : x;
 }
 template <>
-[[deprecated("use raft::abs from raft/core/math.hpp instead")]]
-DI float myAbs(float x)
+[[deprecated("use raft::abs from raft/core/math.hpp instead")]] DI float myAbs(float x)
 {
   return fabsf(x);
 }
 template <>
-[[deprecated("use raft::abs from raft/core/math.hpp instead")]]
-DI double myAbs(double x)
+[[deprecated("use raft::abs from raft/core/math.hpp instead")]] DI double myAbs(double x)
 {
   return fabs(x);
 }
@@ -469,14 +461,14 @@ DI double myAbs(double x)
 template <typename T>
 HDI T myPow(T x, T power);
 template <>
-[[deprecated("use raft::pow from raft/core/math.hpp instead")]]
-HDI float myPow(float x, float power)
+[[deprecated("use raft::pow from raft/core/math.hpp instead")]] HDI float myPow(float x,
+                                                                                float power)
 {
   return powf(x, power);
 }
 template <>
-[[deprecated("use raft::pow from raft/core/math.hpp instead")]]
-HDI double myPow(double x, double power)
+[[deprecated("use raft::pow from raft/core/math.hpp instead")]] HDI double myPow(double x,
+                                                                                 double power)
 {
   return ::pow(x, power);
 }
@@ -489,14 +481,12 @@ HDI double myPow(double x, double power)
 template <typename T>
 HDI T myTanh(T x);
 template <>
-[[deprecated("use raft::tanh from raft/core/math.hpp instead")]]
-HDI float myTanh(float x)
+[[deprecated("use raft::tanh from raft/core/math.hpp instead")]] HDI float myTanh(float x)
 {
   return tanhf(x);
 }
 template <>
-[[deprecated("use raft::tanh from raft/core/math.hpp instead")]]
-HDI double myTanh(double x)
+[[deprecated("use raft::tanh from raft/core/math.hpp instead")]] HDI double myTanh(double x)
 {
   return ::tanh(x);
 }
@@ -509,14 +499,12 @@ HDI double myTanh(double x)
 template <typename T>
 HDI T myATanh(T x);
 template <>
-[[deprecated("use raft::atanh from raft/core/math.hpp instead")]]
-HDI float myATanh(float x)
+[[deprecated("use raft::atanh from raft/core/math.hpp instead")]] HDI float myATanh(float x)
 {
   return atanhf(x);
 }
 template <>
-[[deprecated("use raft::atanh from raft/core/math.hpp instead")]]
-HDI double myATanh(double x)
+[[deprecated("use raft::atanh from raft/core/math.hpp instead")]] HDI double myATanh(double x)
 {
   return ::atanh(x);
 }

From b92e5b4b5685375e9baefdab56844f06f7d41189 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Tue, 17 Jan 2023 18:15:39 +0100
Subject: [PATCH 08/13] Add math.hpp include to operators.hpp

---
 cpp/include/raft/core/operators.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/raft/core/operators.hpp b/cpp/include/raft/core/operators.hpp
index ec26de8d39..125cc5ae9d 100644
--- a/cpp/include/raft/core/operators.hpp
+++ b/cpp/include/raft/core/operators.hpp
@@ -23,6 +23,7 @@
 #include <utility>
 
 #include <raft/core/detail/macros.hpp>
+#include <raft/core/math.hpp>
 
 namespace raft {
 

From 9b606e1d10495d54a56cb39adb303746501f102f Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Wed, 18 Jan 2023 11:53:56 +0100
Subject: [PATCH 09/13] Remove enable_if to allow for implicit conversion + add
 one-arg overload for min and max to simplify use with variadic arguments

---
 cpp/include/raft/core/math.hpp | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index cb27a1856c..99e27a3372 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -57,7 +57,6 @@ constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto acos(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::acos(x);
@@ -71,7 +70,6 @@ constexpr RAFT_INLINE_FUNCTION auto acos(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto asin(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::asin(x);
@@ -85,7 +83,6 @@ constexpr RAFT_INLINE_FUNCTION auto asin(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto atanh(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::atanh(x);
@@ -99,7 +96,6 @@ constexpr RAFT_INLINE_FUNCTION auto atanh(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto cos(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::cos(x);
@@ -113,7 +109,6 @@ constexpr RAFT_INLINE_FUNCTION auto cos(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto exp(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::exp(x);
@@ -127,7 +122,6 @@ constexpr RAFT_INLINE_FUNCTION auto exp(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto log(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::log(x);
@@ -181,11 +175,19 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y)
 #endif
 }
 
+/** Many-argument overload to avoid verbose nested calls or use with variadic arguments */
 template <typename T1, typename T2, typename... Args>
 constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y, Args&&... args)
 {
   return raft::max(x, raft::max(y, std::forward<Args>(args)...));
 }
+
+/** One-argument overload for convenience when using with variadic arguments */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto max(const T& x)
+{
+  return x;
+}
 /** @} */
 
 /**
@@ -233,11 +235,19 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y)
 #endif
 }
 
+/** Many-argument overload to avoid verbose nested calls or use with variadic arguments */
 template <typename T1, typename T2, typename... Args>
 constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args)
 {
   return raft::min(x, raft::min(y, std::forward<Args>(args)...));
 }
+
+/** One-argument overload for convenience when using with variadic arguments */
+template <typename T>
+constexpr RAFT_INLINE_FUNCTION auto min(const T& x)
+{
+  return x;
+}
 /** @} */
 
 /**
@@ -245,7 +255,6 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto pow(const T& x, const T& y)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::pow(x, y);
@@ -268,7 +277,6 @@ constexpr RAFT_INLINE_FUNCTION auto sgn(const T val) -> int
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto sin(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::sin(x);
@@ -298,7 +306,6 @@ constexpr RAFT_INLINE_FUNCTION
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto sqrt(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::sqrt(x);
@@ -312,7 +319,6 @@ constexpr RAFT_INLINE_FUNCTION auto sqrt(const T& x)
  */
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto tanh(const T& x)
-  -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>, T>
 {
 #ifdef __CUDA_ARCH__
   return ::tanh(x);

From 60f789ddae3887f6c51df8920304e46454c1feee Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Wed, 18 Jan 2023 12:07:08 +0100
Subject: [PATCH 10/13] Use raft::min in raft::min_op and raft::max in
 raft::max_op

---
 cpp/include/raft/core/operators.hpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/cpp/include/raft/core/operators.hpp b/cpp/include/raft/core/operators.hpp
index 125cc5ae9d..5f8956b1ff 100644
--- a/cpp/include/raft/core/operators.hpp
+++ b/cpp/include/raft/core/operators.hpp
@@ -156,20 +156,18 @@ struct pow_op {
 };
 
 struct min_op {
-  template <typename Type>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
   {
-    if (a > b) { return b; }
-    return a;
+    return raft::min(std::forward<Args>(args)...);
   }
 };
 
 struct max_op {
-  template <typename Type>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
   {
-    if (b > a) { return b; }
-    return a;
+    return raft::max(std::forward<Args>(args)...);
   }
 };
 

From 75274bda2e7364307c06220083695938f2bd3110 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Wed, 18 Jan 2023 13:38:36 +0100
Subject: [PATCH 11/13] Heterogeneous types in raft::pow and switch functions
 to pass-by-value when the types are expected to be small

---
 cpp/include/raft/core/math.hpp | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index 99e27a3372..b1fb62d9e4 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -29,7 +29,7 @@ namespace raft {
  * @{
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto abs(T x)
   -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T> ||
                         std::is_same_v<int, T> || std::is_same_v<long int, T> ||
                         std::is_same_v<long long int, T>,
@@ -42,7 +42,7 @@ constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
 #endif
 }
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto abs(T x)
   -> std::enable_if_t<!std::is_same_v<float, T> && !std::is_same_v<double, T> &&
                         !std::is_same_v<int, T> && !std::is_same_v<long int, T> &&
                         !std::is_same_v<long long int, T>,
@@ -56,7 +56,7 @@ constexpr RAFT_INLINE_FUNCTION auto abs(const T& x)
  * Inverse cosine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto acos(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto acos(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::acos(x);
@@ -69,7 +69,7 @@ constexpr RAFT_INLINE_FUNCTION auto acos(const T& x)
  * Inverse sine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto asin(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto asin(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::asin(x);
@@ -82,7 +82,7 @@ constexpr RAFT_INLINE_FUNCTION auto asin(const T& x)
  * Inverse hyperbolic tangent
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto atanh(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto atanh(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::atanh(x);
@@ -95,7 +95,7 @@ constexpr RAFT_INLINE_FUNCTION auto atanh(const T& x)
  * Cosine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto cos(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto cos(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::cos(x);
@@ -108,7 +108,7 @@ constexpr RAFT_INLINE_FUNCTION auto cos(const T& x)
  * Exponential function
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto exp(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto exp(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::exp(x);
@@ -121,7 +121,7 @@ constexpr RAFT_INLINE_FUNCTION auto exp(const T& x)
  * Natural logarithm
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto log(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto log(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::log(x);
@@ -253,8 +253,8 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T& x)
 /**
  * Power
  */
-template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto pow(const T& x, const T& y)
+template <typename T1, typename T2>
+constexpr RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
 {
 #ifdef __CUDA_ARCH__
   return ::pow(x, y);
@@ -267,7 +267,7 @@ constexpr RAFT_INLINE_FUNCTION auto pow(const T& x, const T& y)
  * Sign
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sgn(const T val) -> int
+constexpr RAFT_INLINE_FUNCTION auto sgn(T val) -> int
 {
   return (T(0) < val) - (val < T(0));
 }
@@ -276,7 +276,7 @@ constexpr RAFT_INLINE_FUNCTION auto sgn(const T val) -> int
  * Sine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sin(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto sin(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::sin(x);
@@ -305,7 +305,7 @@ constexpr RAFT_INLINE_FUNCTION
  * Square root
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sqrt(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto sqrt(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::sqrt(x);
@@ -318,7 +318,7 @@ constexpr RAFT_INLINE_FUNCTION auto sqrt(const T& x)
  * Hyperbolic tangent
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto tanh(const T& x)
+constexpr RAFT_INLINE_FUNCTION auto tanh(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::tanh(x);

From 8d8618eab2a266f9ec6c1df8144f6c0029e9f5b8 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Wed, 18 Jan 2023 14:03:52 +0100
Subject: [PATCH 12/13] Functions using the CUDA Math API cannot be constexpr

---
 cpp/include/raft/core/math.hpp                | 37 +++++++++----------
 cpp/include/raft/core/operators.hpp           | 10 ++---
 .../raft/matrix/detail/linewise_op.cuh        |  4 +-
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index b1fb62d9e4..2a584e5977 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -29,7 +29,7 @@ namespace raft {
  * @{
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto abs(T x)
+RAFT_INLINE_FUNCTION auto abs(T x)
   -> std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T> ||
                         std::is_same_v<int, T> || std::is_same_v<long int, T> ||
                         std::is_same_v<long long int, T>,
@@ -56,7 +56,7 @@ constexpr RAFT_INLINE_FUNCTION auto abs(T x)
  * Inverse cosine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto acos(T x)
+RAFT_INLINE_FUNCTION auto acos(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::acos(x);
@@ -69,7 +69,7 @@ constexpr RAFT_INLINE_FUNCTION auto acos(T x)
  * Inverse sine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto asin(T x)
+RAFT_INLINE_FUNCTION auto asin(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::asin(x);
@@ -82,7 +82,7 @@ constexpr RAFT_INLINE_FUNCTION auto asin(T x)
  * Inverse hyperbolic tangent
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto atanh(T x)
+RAFT_INLINE_FUNCTION auto atanh(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::atanh(x);
@@ -95,7 +95,7 @@ constexpr RAFT_INLINE_FUNCTION auto atanh(T x)
  * Cosine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto cos(T x)
+RAFT_INLINE_FUNCTION auto cos(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::cos(x);
@@ -108,7 +108,7 @@ constexpr RAFT_INLINE_FUNCTION auto cos(T x)
  * Exponential function
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto exp(T x)
+RAFT_INLINE_FUNCTION auto exp(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::exp(x);
@@ -121,7 +121,7 @@ constexpr RAFT_INLINE_FUNCTION auto exp(T x)
  * Natural logarithm
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto log(T x)
+RAFT_INLINE_FUNCTION auto log(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::log(x);
@@ -144,7 +144,7 @@ constexpr RAFT_INLINE_FUNCTION auto log(T x)
  * @{
  */
 template <typename T1, typename T2>
-constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y)
+RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y)
 {
 #ifdef __CUDA_ARCH__
   // Combinations of types supported by the CUDA Math API
@@ -177,7 +177,7 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y)
 
 /** Many-argument overload to avoid verbose nested calls or use with variadic arguments */
 template <typename T1, typename T2, typename... Args>
-constexpr RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y, Args&&... args)
+RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y, Args&&... args)
 {
   return raft::max(x, raft::max(y, std::forward<Args>(args)...));
 }
@@ -204,7 +204,7 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T& x)
  * @{
  */
 template <typename T1, typename T2>
-constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y)
+RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y)
 {
 #ifdef __CUDA_ARCH__
   // Combinations of types supported by the CUDA Math API
@@ -237,7 +237,7 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y)
 
 /** Many-argument overload to avoid verbose nested calls or use with variadic arguments */
 template <typename T1, typename T2, typename... Args>
-constexpr RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args)
+RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args)
 {
   return raft::min(x, raft::min(y, std::forward<Args>(args)...));
 }
@@ -254,7 +254,7 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T& x)
  * Power
  */
 template <typename T1, typename T2>
-constexpr RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
+RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
 {
 #ifdef __CUDA_ARCH__
   return ::pow(x, y);
@@ -267,7 +267,7 @@ constexpr RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
  * Sign
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sgn(T val) -> int
+RAFT_INLINE_FUNCTION auto sgn(T val) -> int
 {
   return (T(0) < val) - (val < T(0));
 }
@@ -276,7 +276,7 @@ constexpr RAFT_INLINE_FUNCTION auto sgn(T val) -> int
  * Sine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sin(T x)
+RAFT_INLINE_FUNCTION auto sin(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::sin(x);
@@ -289,9 +289,8 @@ constexpr RAFT_INLINE_FUNCTION auto sin(T x)
  * Sine and cosine
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION
-  std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>>
-  sincos(const T& x, T* s, T* c)
+RAFT_INLINE_FUNCTION std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>> sincos(
+  const T& x, T* s, T* c)
 {
 #ifdef __CUDA_ARCH__
   ::sincos(x, s, c);
@@ -305,7 +304,7 @@ constexpr RAFT_INLINE_FUNCTION
  * Square root
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto sqrt(T x)
+RAFT_INLINE_FUNCTION auto sqrt(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::sqrt(x);
@@ -318,7 +317,7 @@ constexpr RAFT_INLINE_FUNCTION auto sqrt(T x)
  * Hyperbolic tangent
  */
 template <typename T>
-constexpr RAFT_INLINE_FUNCTION auto tanh(T x)
+RAFT_INLINE_FUNCTION auto tanh(T x)
 {
 #ifdef __CUDA_ARCH__
   return ::tanh(x);
diff --git a/cpp/include/raft/core/operators.hpp b/cpp/include/raft/core/operators.hpp
index 5f8956b1ff..de27c2b271 100644
--- a/cpp/include/raft/core/operators.hpp
+++ b/cpp/include/raft/core/operators.hpp
@@ -76,7 +76,7 @@ struct value_op {
 
 struct sqrt_op {
   template <typename Type, typename... UnusedArgs>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
   {
     return raft::sqrt(in);
   }
@@ -92,7 +92,7 @@ struct nz_op {
 
 struct abs_op {
   template <typename Type, typename... UnusedArgs>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
   {
     return raft::abs(in);
   }
@@ -149,7 +149,7 @@ struct div_checkzero_op {
 
 struct pow_op {
   template <typename Type>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
   {
     return raft::pow(a, b);
   }
@@ -157,7 +157,7 @@ struct pow_op {
 
 struct min_op {
   template <typename... Args>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
   {
     return raft::min(std::forward<Args>(args)...);
   }
@@ -165,7 +165,7 @@ struct min_op {
 
 struct max_op {
   template <typename... Args>
-  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
   {
     return raft::max(std::forward<Args>(args)...);
   }
diff --git a/cpp/include/raft/matrix/detail/linewise_op.cuh b/cpp/include/raft/matrix/detail/linewise_op.cuh
index 2f000f5bc2..7440f0ebce 100644
--- a/cpp/include/raft/matrix/detail/linewise_op.cuh
+++ b/cpp/include/raft/matrix/detail/linewise_op.cuh
@@ -343,7 +343,9 @@ __global__ void __launch_bounds__(MaxOffset, 2)
 template <typename... Types>
 constexpr size_t maxSizeOf()
 {
-  return raft::max(sizeof(Types)...);
+  size_t maxSize = 0;
+  ((maxSize = std::max(maxSize, sizeof(Types))), ...);
+  return maxSize;
 }
 
 /**

From 8a437dadbc382f1c38e8c6debb3cb3196e3b3b84 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Fri, 20 Jan 2023 14:12:26 +0100
Subject: [PATCH 13/13] Documentation improvements

---
 cpp/include/raft/core/math.hpp | 125 +++++++++++++++------------------
 1 file changed, 58 insertions(+), 67 deletions(-)

diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
index 2a584e5977..c5f08b84b7 100644
--- a/cpp/include/raft/core/math.hpp
+++ b/cpp/include/raft/core/math.hpp
@@ -25,7 +25,7 @@
 namespace raft {
 
 /**
- * @defgroup Absolute value
+ * @defgroup Absolute Absolute value
  * @{
  */
 template <typename T>
@@ -53,8 +53,10 @@ constexpr RAFT_INLINE_FUNCTION auto abs(T x)
 /** @} */
 
 /**
- * Inverse cosine
+ * @defgroup Trigonometry Trigonometry functions
+ * @{
  */
+/** Inverse cosine */
 template <typename T>
 RAFT_INLINE_FUNCTION auto acos(T x)
 {
@@ -65,9 +67,7 @@ RAFT_INLINE_FUNCTION auto acos(T x)
 #endif
 }
 
-/**
- * Inverse sine
- */
+/** Inverse sine */
 template <typename T>
 RAFT_INLINE_FUNCTION auto asin(T x)
 {
@@ -78,9 +78,7 @@ RAFT_INLINE_FUNCTION auto asin(T x)
 #endif
 }
 
-/**
- * Inverse hyperbolic tangent
- */
+/** Inverse hyperbolic tangent */
 template <typename T>
 RAFT_INLINE_FUNCTION auto atanh(T x)
 {
@@ -91,9 +89,7 @@ RAFT_INLINE_FUNCTION auto atanh(T x)
 #endif
 }
 
-/**
- * Cosine
- */
+/** Cosine */
 template <typename T>
 RAFT_INLINE_FUNCTION auto cos(T x)
 {
@@ -104,9 +100,47 @@ RAFT_INLINE_FUNCTION auto cos(T x)
 #endif
 }
 
+/** Sine */
+template <typename T>
+RAFT_INLINE_FUNCTION auto sin(T x)
+{
+#ifdef __CUDA_ARCH__
+  return ::sin(x);
+#else
+  return std::sin(x);
+#endif
+}
+
+/** Sine and cosine */
+template <typename T>
+RAFT_INLINE_FUNCTION std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>> sincos(
+  const T& x, T* s, T* c)
+{
+#ifdef __CUDA_ARCH__
+  ::sincos(x, s, c);
+#else
+  *s = std::sin(x);
+  *c = std::cos(x);
+#endif
+}
+
+/** Hyperbolic tangent */
+template <typename T>
+RAFT_INLINE_FUNCTION auto tanh(T x)
+{
+#ifdef __CUDA_ARCH__
+  return ::tanh(x);
+#else
+  return std::tanh(x);
+#endif
+}
+/** @} */
+
 /**
- * Exponential function
+ * @defgroup Exponential Exponential and logarithm
+ * @{
  */
+/** Exponential function */
 template <typename T>
 RAFT_INLINE_FUNCTION auto exp(T x)
 {
@@ -117,9 +151,7 @@ RAFT_INLINE_FUNCTION auto exp(T x)
 #endif
 }
 
-/**
- * Natural logarithm
- */
+/** Natural logarithm */
 template <typename T>
 RAFT_INLINE_FUNCTION auto log(T x)
 {
@@ -129,9 +161,10 @@ RAFT_INLINE_FUNCTION auto log(T x)
   return std::log(x);
 #endif
 }
+/** @} */
 
 /**
- * @defgroup Maximum of two or more values.
+ * @defgroup Maximum Maximum of two or more values.
  *
  * The CUDA Math API has overloads for all combinations of float/double. We provide similar
  * functionality while wrapping around std::max, which only supports arguments of the same type.
@@ -191,7 +224,7 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T& x)
 /** @} */
 
 /**
- * @defgroup Minimum of two or more values.
+ * @defgroup Minimum Minimum of two or more values.
  *
  * The CUDA Math API has overloads for all combinations of float/double. We provide similar
  * functionality while wrapping around std::min, which only supports arguments of the same type.
@@ -251,8 +284,10 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T& x)
 /** @} */
 
 /**
- * Power
+ * @defgroup Power Power and root functions
+ * @{
  */
+/** Power */
 template <typename T1, typename T2>
 RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
 {
@@ -263,46 +298,7 @@ RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y)
 #endif
 }
 
-/**
- * Sign
- */
-template <typename T>
-RAFT_INLINE_FUNCTION auto sgn(T val) -> int
-{
-  return (T(0) < val) - (val < T(0));
-}
-
-/**
- * Sine
- */
-template <typename T>
-RAFT_INLINE_FUNCTION auto sin(T x)
-{
-#ifdef __CUDA_ARCH__
-  return ::sin(x);
-#else
-  return std::sin(x);
-#endif
-}
-
-/**
- * Sine and cosine
- */
-template <typename T>
-RAFT_INLINE_FUNCTION std::enable_if_t<std::is_same_v<float, T> || std::is_same_v<double, T>> sincos(
-  const T& x, T* s, T* c)
-{
-#ifdef __CUDA_ARCH__
-  ::sincos(x, s, c);
-#else
-  *s = std::sin(x);
-  *c = std::cos(x);
-#endif
-}
-
-/**
- * Square root
- */
+/** Square root */
 template <typename T>
 RAFT_INLINE_FUNCTION auto sqrt(T x)
 {
@@ -312,18 +308,13 @@ RAFT_INLINE_FUNCTION auto sqrt(T x)
   return std::sqrt(x);
 #endif
 }
+/** @} */
 
-/**
- * Hyperbolic tangent
- */
+/** Sign */
 template <typename T>
-RAFT_INLINE_FUNCTION auto tanh(T x)
+RAFT_INLINE_FUNCTION auto sgn(T val) -> int
 {
-#ifdef __CUDA_ARCH__
-  return ::tanh(x);
-#else
-  return std::tanh(x);
-#endif
+  return (T(0) < val) - (val < T(0));
 }
 
 }  // namespace raft