rapidsai · rapids-bot · Sep 30, 2022 · Jul 8, 2022 · Jul 13, 2022 · Jul 13, 2022
diff --git a/cpp/include/raft/core/cudart_utils.hpp b/cpp/include/raft/core/cudart_utils.hpp
@@ -30,6 +30,7 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
+#include <cuda.h>
 #include <cuda_runtime.h>
 
 #include <chrono>

diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp
@@ -639,6 +639,7 @@ using device_scalar_view = device_mdspan<ElementType, scalar_extent<IndexType>>;
  * @brief Shorthand for 1-dim host mdspan.
  * @tparam ElementType the data type of the vector elements
  * @tparam IndexType the index type of the extents
+ * @tparam LayoutPolicy policy for strides and layout ordering
  */
 template <typename ElementType,
           typename IndexType    = std::uint32_t,

@@ -25,6 +25,8 @@
 
 #include "detail/add.cuh"
 
+#include <raft/core/mdarray.hpp>
+
 namespace raft {
 namespace linalg {
 
@@ -46,7 +48,7 @@ using detail::adds_scalar;
  * @param stream cuda stream where to launch work
  */
 template <typename InT, typename OutT = InT, typename IdxType = int>
-void addScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream)
+void addScalar(OutT* out, const InT* in, const InT scalar, IdxType len, cudaStream_t stream)
 {
   detail::addScalar(out, in, scalar, len, stream);
 }
@@ -72,24 +74,150 @@ void add(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t st
 
 /** Substract single value pointed by singleScalarDev parameter in device memory from inDev[i] and
  * write result to outDev[i]
- * @tparam math_t data-type upon which the math operation will be performed
+ * @tparam InT     input data-type. Also the data-type upon which the math ops
+ *                 will be performed
+ * @tparam OutT    output data-type
  * @tparam IdxType Integer type used to for addressing
  * @param outDev the output buffer
  * @param inDev the input buffer
  * @param singleScalarDev pointer to the scalar located in device memory
  * @param len number of elements in the input and output buffer
  * @param stream cuda stream
  */
-template <typename math_t, typename IdxType = int>
-void addDevScalar(math_t* outDev,
-                  const math_t* inDev,
-                  const math_t* singleScalarDev,
-                  IdxType len,
-                  cudaStream_t stream)
+template <typename InT, typename OutT = InT, typename IdxType = int>
+void addDevScalar(
+  OutT* outDev, const InT* inDev, const InT* singleScalarDev, IdxType len, cudaStream_t stream)
 {
   detail::addDevScalar(outDev, inDev, singleScalarDev, len, stream);
 }
 
+/**
+ * @defgroup add Addition Arithmetic
+ * @{
+ */
+
+/**
+ * @brief Elementwise add operation
+ * @tparam InType    Input Type raft::device_mdspan
+ * @tparam OutType   Output Type raft::device_mdspan
+ * @param[in] handle raft::handle_t
+ * @param[in] in1    First Input
+ * @param[in] in2    Second Input
+ * @param[out] out    Output
+ */
+template <typename InType,
+          typename OutType,
+          typename = raft::enable_if_device_mdspan<OutType, InType>>
+void add(const raft::handle_t& handle, const InType in1, const InType in2, OutType out)
+{
+  using in_value_t  = typename InType::value_type;
+  using out_value_t = typename OutType::value_type;
+
+  RAFT_EXPECTS(out.is_exhaustive(), "Output must be contiguous");
+  RAFT_EXPECTS(in1.is_exhaustive(), "Input 1 must be contiguous");
+  RAFT_EXPECTS(in2.is_exhaustive(), "Input 2 must be contiguous");
+  RAFT_EXPECTS(out.size() == in1.size() && in1.size() == in2.size(),
+               "Size mismatch between Output and Inputs");
+
+  if (out.size() <= std::numeric_limits<std::uint32_t>::max()) {
+    add<in_value_t, out_value_t, std::uint32_t>(out.data_handle(),
+                                                in1.data_handle(),
+                                                in2.data_handle(),
+                                                static_cast<std::uint32_t>(out.size()),
+                                                handle.get_stream());
+  } else {
+    add<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
+                                                in1.data_handle(),
+                                                in2.data_handle(),
+                                                static_cast<std::uint64_t>(out.size()),
+                                                handle.get_stream());
+  }
+}
+
+/**
+ * @brief Elementwise addition of device scalar to input
+ * @tparam InType    Input Type raft::device_mdspan
+ * @tparam OutType   Output Type raft::device_mdspan
+ * @tparam ScalarIdxType Index Type of scalar
+ * @param[in] handle raft::handle_t
+ * @param[in] in    Input
+ * @param[in] scalar    raft::device_scalar_view
+ * @param[in] out    Output
+ */
+template <typename InType,
+          typename OutType,
+          typename ScalarIdxType,
+          typename = raft::enable_if_device_mdspan<OutType, InType>>
+void add_scalar(const raft::handle_t& handle,
+                InType in,
+                OutType out,
+                raft::device_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
+{
+  using in_value_t  = typename InType::value_type;
+  using out_value_t = typename OutType::value_type;
+
+  RAFT_EXPECTS(out.is_exhaustive(), "Output must be contiguous");
+  RAFT_EXPECTS(in.is_exhaustive(), "Input must be contiguous");
+  RAFT_EXPECTS(out.size() == in.size(), "Size mismatch between Output and Input");
+
+  if (out.size() <= std::numeric_limits<std::uint32_t>::max()) {
+    addDevScalar<in_value_t, out_value_t, std::uint32_t>(out.data_handle(),
+                                                         in.data_handle(),
+                                                         scalar.data_handle(),
+                                                         static_cast<std::uint32_t>(out.size()),
+                                                         handle.get_stream());
+  } else {
+    addDevScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
+                                                         in.data_handle(),
+                                                         scalar.data_handle(),
+                                                         static_cast<std::uint64_t>(out.size()),
+                                                         handle.get_stream());
+  }
+}
+
+/**
+ * @brief Elementwise addition of host scalar to input
+ * @tparam InType    Input Type raft::device_mdspan
+ * @tparam OutType   Output Type raft::device_mdspan
+ * @tparam ScalarIdxType Index Type of scalar
+ * @param[in] handle raft::handle_t
+ * @param[in] in    Input
+ * @param[in] scalar    raft::host_scalar_view
+ * @param[in] out    Output
+ */
+template <typename InType,
+          typename OutType,
+          typename ScalarIdxType,
+          typename = raft::enable_if_device_mdspan<OutType, InType>>
+void add_scalar(const raft::handle_t& handle,
+                const InType in,
+                OutType out,
+                raft::host_scalar_view<const typename InType::value_type, ScalarIdxType> scalar)
+{
+  using in_value_t  = typename InType::value_type;
+  using out_value_t = typename OutType::value_type;
+
+  RAFT_EXPECTS(out.is_exhaustive(), "Output must be contiguous");
+  RAFT_EXPECTS(in.is_exhaustive(), "Input must be contiguous");
+  RAFT_EXPECTS(out.size() == in.size(), "Size mismatch between Output and Input");
+
+  if (out.size() <= std::numeric_limits<std::uint32_t>::max()) {
+    addScalar<in_value_t, out_value_t, std::uint32_t>(out.data_handle(),
+                                                      in.data_handle(),
+                                                      *scalar.data_handle(),
+                                                      static_cast<std::uint32_t>(out.size()),
+                                                      handle.get_stream());
+  } else {
+    addScalar<in_value_t, out_value_t, std::uint64_t>(out.data_handle(),
+                                                      in.data_handle(),
+                                                      *scalar.data_handle(),
+                                                      static_cast<std::uint64_t>(out.size()),
+                                                      handle.get_stream());
+  }
+}
+
+/** @} */  // end of group add
+
 };  // end namespace linalg
 };  // end namespace raft
 

diff --git a/cpp/include/raft/linalg/apply.hpp b/cpp/include/raft/linalg/apply.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace raft::linalg {
+
+/**
+ * @brief Enum for reduction/broadcast where an operation is to be performed along
+ *        a matrix's rows or columns
+ *
+ */
+enum class Apply { ALONG_ROWS, ALONG_COLUMNS };
+
+}  // end namespace raft::linalg
@@ -50,6 +50,79 @@ void axpy(const raft::handle_t& handle,
   detail::axpy<T, DevicePointerMode>(handle, n, alpha, x, incx, y, incy, stream);
 }
 
+/**
+ * @defgroup axpy axpy
+ * @{
+ */
+
+/**
+ * @brief axpy function
+ *  It computes the following equation: y = alpha * x + y
+ *
+ * @tparam MdspanType  Type raft::device_mdspan
+ * @tparam ScalarIdxType Index Type of scalar
+ * @param [in] handle raft::handle_t
+ * @param [in] alpha raft::device_scalar_view
+ * @param [in] x Input vector
+ * @param [in] incx stride between consecutive elements of x
+ * @param [inout] y Output vector
+ * @param [in] incy stride between consecutive elements of y
+ */
+template <typename MdspanType, typename = raft::enable_if_device_mdspan<MdspanType>>
+void axpy(const raft::handle_t& handle,
+          raft::device_scalar_view<typename MdspanType::element_type, ScalarIdxType> alpha,
+          const MdspanType x,
+          const int incx,
+          MdspanType y,
+          const int incy)
+{
+  RAFT_EXPECTS(y.size() == x.size(), "Size mismatch between Output and Input")
+
+  axpy<typename MdspanType::element_type, true>(handle,
+                                                y.size(),
+                                                alpha.data_handle(),
+                                                x.data_handle(),
+                                                incx,
+                                                y.data_handle(),
+                                                incy,
+                                                handle.get_stream());
+}
+
+/**
+ * @brief axpy function
+ *  It computes the following equation: y = alpha * x + y
+ *
+ * @tparam MdspanType  Type raft::device_mdspan
+ * @tparam ScalarIdxType Index Type of scalar
+ * @param [in] handle raft::handle_t
+ * @param [in] alpha raft::device_scalar_view
+ * @param [in] x Input vector
+ * @param [in] incx stride between consecutive elements of x
+ * @param [inout] y Output vector
+ * @param [in] incy stride between consecutive elements of y
+ */
+template <typename MdspanType, typename = raft::enable_if_device_mdspan<MdspanType>>
+void axpy(const raft::handle_t& handle,
+          raft::host_scalar_view<const typename MdspanType::value_type, ScalarIdxType> alpha,
+          MdspanType x,
+          const int incx,
+          MdspanType y,
+          const int incy)
+{
+  RAFT_EXPECTS(y.size() == x.size(), "Size mismatch between Output and Input")
+
+  axpy<typename MdspanType::value_type, false>(handle,
+                                               y.size(),
+                                               alpha.data_handle(),
+                                               x.data_handle(),
+                                               incx,
+                                               y.data_handle(),
+                                               incy,
+                                               handle.get_stream());
+}
+
+/** @} */  // end of group axpy
+
 }  // namespace raft::linalg
 
 #endif
@@ -20,6 +20,7 @@
 
 #include "detail/binary_op.cuh"
 
+#include <raft/core/mdarray.hpp>
 #include <raft/cuda_utils.cuh>
 
 namespace raft {
@@ -52,6 +53,52 @@ void binaryOp(
   detail::binaryOp(out, in1, in2, len, op, stream);
 }
 
+/**
+ * @defgroup binary_op Element-Wise Binary Operation
+ * @{
+ */
+
+/**
+ * @brief perform element-wise binary operation on the input arrays
+ * @tparam InType Input Type raft::device_mdspan
+ * @tparam Lambda the device-lambda performing the actual operation
+ * @tparam OutType Output Type raft::device_mdspan
+ * @tparam TPB threads-per-block in the final kernel launched
+ * @param[in] handle raft::handle_t
+ * @param[in] in1 First input
+ * @param[in] in2 Second input
+ * @param[out] out Output
+ * @param[in] op the device-lambda
+ * @note Lambda must be a functor with the following signature:
+ *       `OutType func(const InType& val1, const InType& val2);`
+ */
+template <typename InType,
+          typename Lambda,
+          typename OutType,
+          int TPB  = 256,
+          typename = raft::enable_if_device_mdspan<InType, OutType>>
+void binary_op(const raft::handle_t& handle, InType in1, InType in2, OutType out, Lambda op)
+{
+  RAFT_EXPECTS(out.is_exhaustive(), "Output must be contiguous");
+  RAFT_EXPECTS(in1.is_exhaustive(), "Input 1 must be contiguous");
+  RAFT_EXPECTS(in2.is_exhaustive(), "Input 2 must be contiguous");
+  RAFT_EXPECTS(out.size() == in1.size() && in1.size() == in2.size(),
+               "Size mismatch between Output and Inputs");
+
+  using in_value_t  = typename InType::value_type;
+  using out_value_t = typename OutType::value_type;
+
+  if (out.size() <= std::numeric_limits<std::uint32_t>::max()) {
+    binaryOp<in_value_t, Lambda, out_value_t, std::uint32_t, TPB>(
+      out.data_handle(), in1.data_handle(), in2.data_handle(), out.size(), op, handle.get_stream());
+  } else {
+    binaryOp<in_value_t, Lambda, out_value_t, std::uint64_t, TPB>(
+      out.data_handle(), in1.data_handle(), in2.data_handle(), out.size(), op, handle.get_stream());
+  }
+}
+
+/** @} */  // end of group binary_op
+
 };  // end namespace linalg
 };  // end namespace raft