rapidsai · rapids-bot · Oct 27, 2022 · Oct 3, 2022 · Oct 4, 2022 · Oct 5, 2022
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <raft/core/detail/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/mdarray.hpp>

@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <raft/core/detail/host_device_accessor.hpp>
 #include <raft/core/mdspan.hpp>
 

@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <raft/core/host_mdspan.hpp>
 
 #include <raft/core/detail/host_mdarray.hpp>

@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <raft/core/mdspan.hpp>
 
 #include <raft/core/detail/host_device_accessor.hpp>

@@ -47,7 +47,7 @@ using vector_extent = std::experimental::extents<IndexType, dynamic_extent>;
 template <typename IndexType>
 using matrix_extent = std::experimental::extents<IndexType, dynamic_extent, dynamic_extent>;
 
-template <typename IndexType = std::uint32_t>
+template <typename IndexType>
 using scalar_extent = std::experimental::extents<IndexType, 1>;
 
 /**

@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/matrix/detail/matrix.cuh>
+#include <raft/util/input_validation.hpp>
+
+namespace raft::linalg {
+
+/**
+ * @brief multiply each row or column of matrix with vector, skipping zeros in vector
+ * @param [in] handle: raft handle for managing library resources
+ * @param[inout] data: input matrix, results are in-place
+ * @param[in] vec: input vector
+ * @param[in] bcast_along_rows: whether to broadcast vector along rows of matrix or columns
+ */
+template <typename Type, typename IdxType>
+void binary_mult_skip_zero(const raft::handle_t& handle,
+                           raft::device_matrix_view<Type, IdxType> data,
+                           raft::device_vector_view<const Type> vec,
+                           bool bcast_along_rows)
+{
+  bool row_major = raft::is_row_major(data);
+
+  IdxType vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+
+  RAFT_EXPECTS(
+    vec.extent(0) == vec_size,
+    "If `bcast_along_rows==true`, vector size must equal number of columns in the matrix."
+    "If `bcast_along_rows==false`, vector size must equal number of rows in the matrix.");
+
+  matrix::detail::matrixVectorBinaryMultSkipZero<Type, IdxType>(data.data_handle(),
+                                                                vec.data_handle(),
+                                                                matrix.extent(0),
+                                                                matrix.extent(1),
+                                                                row_major,
+                                                                bcast_along_rows,
+                                                                handle.get_stream());
+}
+
+/**
+ * @brief divide each row or column of matrix with vector
+ * @param[in] handle: raft handle for managing library resources
+ * @param[inout] data: input matrix, results are in-place
+ * @param[in] vec: input vector
+ * @param[in] bcast_along_rows: whether to broadcast vector along rows of matrix or columns
+ */
+template <typename Type, typename IdxType>
+void binary_div(const raft::handle_t& handle,
+                raft::device_matrix_view<Type, IdxType> data,
+                raft::device_vector_view<const Type> vec,
+                bool bcast_along_rows)
+{
+  bool row_major = raft::is_row_major(data);
+
+  IdxType vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+
+  RAFT_EXPECTS(
+    vec.extent(0) == vec_size,
+    "If `bcast_along_rows==true`, vector size must equal number of columns in the matrix."
+    "If `bcast_along_rows==false`, vector size must equal number of rows in the matrix.");
+
+  matrix::detail::matrixVectorBinaryDiv<Type, IdxType>(data.data_handle(),
+                                                       vec.data_handle(),
+                                                       data.extent(0),
+                                                       data.extent(1),
+                                                       row_major,
+                                                       bcast_along_rows,
+                                                       handle.get_stream());
+}
+
+/**
+ * @brief divide each row or column of matrix with vector, skipping zeros in vector
+ * @param[in] handle: raft handle for managing library resources
+ * @param[inout] data: input matrix, results are in-place
+ * @param[in] vec: input vector
+ * @param[in] bcast_along_rows: whether to broadcast vector along rows of matrix or columns
+ * @param[in] return_zero: result is zero if true and vector value is below threshold, original
+ * value if false
+ */
+template <typename Type, typename IdxType>
+void binary_div_skip_zero(const raft::handle_t& handle,
+                          raft::device_matrix_view<Type, IdxType> data,
+                          raft::device_vector_view<const Type> vec,
+                          bool bcast_along_rows,
+                          bool return_zero = false)
+{
+  bool row_major = raft::is_row_major(data);
+
+  IdxType vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+
+  RAFT_EXPECTS(
+    vec.extent(0) == vec_size,
+    "If `bcast_along_rows==true`, vector size must equal number of columns in the matrix."
+    "If `bcast_along_rows==false`, vector size must equal number of rows in the matrix.");
+
+  matrix::detail::matrixVectorBinaryDivSkipZero<Type, IdxType>(data.data_handle(),
+                                                               vec.data_handle(),
+                                                               data.extent(0),
+                                                               data.extent(1),
+                                                               row_major,
+                                                               bcast_along_rows,
+                                                               handle.get_stream(),
+                                                               return_zero);
+}
+
+/**
+ * @brief add each row or column of matrix with vector
+ * @param[in] handle: raft handle for managing library resources
+ * @param[inout] data: input matrix, results are in-place
+ * @param[in] vec: input vector
+ * @param[in] bcast_along_rows: whether to broadcast vector along rows of matrix or columns
+ */
+template <typename Type, typename IdxType>
+void binary_add(const raft::handle_t& handle,
+                raft::device_matrix_view<Type, IdxType> data,
+                raft::device_vector_view<const Type> vec,
+                bool bcast_along_rows)
+{
+  bool row_major = raft::is_row_major(data);
+
+  IdxType vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+
+  RAFT_EXPECTS(
+    vec.extent(0) == vec_size,
+    "If `bcast_along_rows==true`, vector size must equal number of columns in the matrix."
+    "If `bcast_along_rows==false`, vector size must equal number of rows in the matrix.");
+
+  matrix::detail::matrixVectorBinaryAdd<Type, IdxType, TPB>(data.data_handle(),
+                                                            vec.data_handle(),
+                                                            data.extent(0),
+                                                            data.extent(1),
+                                                            row_major,
+                                                            bcast_along_rows,
+                                                            handle.get_stream());
+}
+
+/**
+ * @brief subtract each row or column of matrix with vector
+ * @param[in] handle: raft handle for managing library resources
+ * @param[inout] data: input matrix, results are in-place
+ * @param[in] vec: input vector
+ * @param[in] bcast_along_rows: whether to broadcast vector along rows of matrix or columns
+ */
+template <typename Type, typename IdxType>
+void binary_sub(const raft::handle_t& handle,
+                raft::device_matrix_view<Type, IdxType> data,
+                raft::device_vector_view<const Type> vec,
+                bool bcast_along_rows)
+{
+  bool row_major = raft::is_row_major(data);
+
+  IdxType vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+
+  RAFT_EXPECTS(
+    vec.extent(0) == vec_size,
+    "If `bcast_along_rows==true`, vector size must equal number of columns in the matrix."
+    "If `bcast_along_rows==false`, vector size must equal number of rows in the matrix.");
+
+  matrix::detail::matrixVectorBinarySub<Type, IdxType>(data.data_handle(),
+                                                       vec.data_handle(),
+                                                       data.extent(0),
+                                                       data.extent(1),
+                                                       row_major,
+                                                       bcast_along_rows,
+                                                       handle.get_stream());
+}
+
+}  // namespace raft::linalg
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/core/device_mdspan.hpp>
+#include <raft/matrix/detail/math.cuh>
+#include <raft/matrix/matrix.cuh>
+
+namespace raft::matrix {
+
+/**
+ * @brief Argmax: find the row idx with maximum value for each column
+ * @param handle: raft handle
+ * @param in: input matrix of size (n_rows, n_cols)
+ * @param out: output vector of size n_cols
+ */
+template <typename math_t, typename idx_t, typename matrix_idx_t>
+void argmax(const raft::handle_t& handle,
+            raft::device_matrix_view<const math_t, matrix_idx_t, row_major> in,
+            raft::device_vector_view<idx_t, matrix_idx_t> out)
+{
+  RAFT_EXPECTS(out.extent(0) == in.extent(0),
+               "Size of output vector must equal number of rows in input matrix.");
+  detail::argmax(
+    in.data_handle(), in.extent(0), in.extent(1), out.data_handle(), handle.get_stream());
+}
+}  // namespace raft::matrix
@@ -116,15 +116,14 @@ void rowReverse(m_t* inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
 
   thrust::for_each(
     rmm::exec_policy(stream), counting, counting + (size / 2), [=] __device__(idx_t idx) {
-      idx_t dest_row = idx % m;
-      idx_t dest_col = idx / m;
+      idx_t dest_row = idx % (m / 2);
+      idx_t dest_col = idx / (m / 2);
       idx_t src_row  = (m - dest_row) - 1;
-      ;
-      idx_t src_col = dest_col;
+      idx_t src_col  = dest_col;
 
-      m_t temp                   = (m_t)d_q_reversed[idx];
-      d_q_reversed[idx]          = d_q[src_col * m + src_row];
-      d_q[src_col * m + src_row] = temp;
+      m_t temp                              = (m_t)d_q_reversed[dest_col * m + dest_row];
+      d_q_reversed[dest_col * m + dest_row] = d_q[src_col * m + src_row];
+      d_q[src_col * m + src_row]            = temp;
     });
 }
 
@@ -170,7 +169,7 @@ void printHost(const m_t* in, idx_t n_rows, idx_t n_cols)
  */
 template <typename m_t, typename idx_t = int>
 __global__ void slice(
-  m_t* src_d, idx_t m, idx_t n, m_t* dst_d, idx_t x1, idx_t y1, idx_t x2, idx_t y2)
+  const m_t* src_d, idx_t m, idx_t n, m_t* dst_d, idx_t x1, idx_t y1, idx_t x2, idx_t y2)
 {
   idx_t idx = threadIdx.x + blockDim.x * blockIdx.x;
   idx_t dm = x2 - x1, dn = y2 - y1;
@@ -182,7 +181,7 @@ __global__ void slice(
 }
 
 template <typename m_t, typename idx_t = int>
-void sliceMatrix(m_t* in,
+void sliceMatrix(const m_t* in,
                  idx_t n_rows,
                  idx_t n_cols,
                  m_t* out,
@@ -207,7 +206,7 @@ void sliceMatrix(m_t* in,
  * @param k: min(n_rows, n_cols)
  */
 template <typename m_t, typename idx_t = int>
-__global__ void getUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, idx_t k)
+__global__ void getUpperTriangular(const m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, idx_t k)
 {
   idx_t idx = threadIdx.x + blockDim.x * blockIdx.x;
   idx_t m = n_rows, n = n_cols;
@@ -218,7 +217,7 @@ __global__ void getUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_col
 }
 
 template <typename m_t, typename idx_t = int>
-void copyUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
+void copyUpperTriangular(const m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
 {
   idx_t m = n_rows, n = n_cols;
   idx_t k = std::min(m, n);
@@ -236,23 +235,48 @@ void copyUpperTriangular(m_t* src, m_t* dst, idx_t n_rows, idx_t n_cols, cudaStr
  * @param k: dimensionality
  */
 template <typename m_t, typename idx_t = int>
-__global__ void copyVectorToMatrixDiagonal(m_t* vec, m_t* matrix, idx_t m, idx_t n, idx_t k)
+__global__ void copyVectorToMatrixDiagonal(const m_t* vec, m_t* matrix, idx_t m, idx_t n, idx_t k)
 {
   idx_t idx = threadIdx.x + blockDim.x * blockIdx.x;
 
   if (idx < k) { matrix[idx + idx * m] = vec[idx]; }
 }
 
+/**
+ * @brief Copy matrix diagonal to vector
+ * @param vec: vector of length k = min(n_rows, n_cols)
+ * @param matrix: matrix of size n_rows x n_cols
+ * @param m: number of rows of the matrix
+ * @param n: number of columns of the matrix
+ * @param k: dimensionality
+ */
+template <typename m_t, typename idx_t = int>
+__global__ void copyVectorFromMatrixDiagonal(m_t* vec, const m_t* matrix, idx_t m, idx_t n, idx_t k)
+{
+  idx_t idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+  if (idx < k) { vec[idx] = matrix[idx + idx * m]; }
+}
+
 template <typename m_t, typename idx_t = int>
 void initializeDiagonalMatrix(
-  m_t* vec, m_t* matrix, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
+  const m_t* vec, m_t* matrix, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
 {
   idx_t k = std::min(n_rows, n_cols);
   dim3 block(64);
   dim3 grid((k + block.x - 1) / block.x);
   copyVectorToMatrixDiagonal<<<grid, block, 0, stream>>>(vec, matrix, n_rows, n_cols, k);
 }
 
+template <typename m_t, typename idx_t = int>
+void getDiagonalMatrix(m_t* vec, const m_t* matrix, idx_t n_rows, idx_t n_cols, cudaStream_t stream)
+{
+  idx_t k = std::min(n_rows, n_cols);
+  dim3 block(64);
+  dim3 grid((k + block.x - 1) / block.x);
+  copyVectorFromMatrixDiagonal<<<grid, block, 0, stream>>>(vec, matrix, n_rows, n_cols, k);
+}
+
 /**
  * @brief Calculate the inverse of the diagonal of a square matrix
  * element-wise and in place
@@ -275,11 +299,15 @@ void getDiagonalInverseMatrix(m_t* in, idx_t len, cudaStream_t stream)
 }
 
 template <typename m_t, typename idx_t = int>
-m_t getL2Norm(const raft::handle_t& handle, m_t* in, idx_t size, cudaStream_t stream)
+m_t getL2Norm(const raft::handle_t& handle, const m_t* in, idx_t size, cudaStream_t stream)
 {
   cublasHandle_t cublasH = handle.get_cublas_handle();
   m_t normval            = 0;
-  RAFT_CUBLAS_TRY(raft::linalg::detail::cublasnrm2(cublasH, size, in, 1, &normval, stream));
+  RAFT_EXPECTS(
+    std::is_integral_v<idx_t> && (std::size_t)size <= (std::size_t)std::numeric_limits<int>::max(),
+    "Index type not supported");
+  RAFT_CUBLAS_TRY(
+    raft::linalg::detail::cublasnrm2(cublasH, static_cast<int>(size), in, 1, &normval, stream));
   return normval;
 }