From 2ed073ca9637b193f047e192f2ffce5a85d838f1 Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 04:45:53 +0800 Subject: [PATCH 01/15] Implement matrix transpose with mdspan. * Implement a transpose function that works on both column and row major matrix. --- cpp/include/raft/linalg/detail/transpose.cuh | 1 - cpp/include/raft/linalg/transpose.cuh | 79 +++++++++++++++++++- cpp/test/linalg/transpose.cu | 33 ++++++++ 3 files changed, 111 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index c09b7a2450..cea7bdf5e1 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -78,7 +78,6 @@ void transpose(math_t* inout, int n, cudaStream_t stream) } }); } - }; // end namespace detail }; // end namespace linalg }; // end namespace raft diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index a9ada5125a..9d0d68049d 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -19,6 +19,7 @@ #pragma once #include "detail/transpose.cuh" +#include namespace raft { namespace linalg { @@ -55,7 +56,83 @@ void transpose(math_t* inout, int n, cudaStream_t stream) detail::transpose(inout, n, stream); } +/** + * @brief Transpose a contiguous matrix. The output have same layout policy as input. + * + * @param in Input matrix, the storage should be contiguous. + * @param out Output matirx, storage is pre-allocated by caller and should be contiguous. + */ +template +std::enable_if_t && + (std::is_same_v || + std::is_same_v), + void> +transpose(handle_t const& handle, + device_matrix_view in, + device_matrix_view out) +{ + ASSERT(out.extent(0) == in.extent(1), "Invalid shape for transpose."); + ASSERT(out.extent(1) == in.extent(0), "Invalid shape for transpose."); + ASSERT(in.is_contiguous(), "Invalid format for transpose input."); + ASSERT(out.is_contiguous(), "Invalid format for transpose output."); + + size_t out_n_rows = in.extent(1); + size_t out_n_cols = in.extent(0); + + T constexpr kOne = 1; + T constexpr kZero = 0; + if constexpr (std::is_same_v) { + CUBLAS_TRY(detail::cublasgeam(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + out_n_cols, + out_n_rows, + &kOne, + in.data(), + in.stride(0), + &kZero, + static_cast(nullptr), + out.stride(0), + out.data(), + out.stride(0), + handle.get_stream())); + } else if (std::is_same_v) { + CUBLAS_TRY(detail::cublasgeam(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + out_n_rows, + out_n_cols, + &kOne, + in.data(), + in.stride(1), + &kZero, + static_cast(nullptr), + out.stride(1), + out.data(), + out.stride(1), + handle.get_stream())); + } else { + ASSERT(false, "Unknown layout."); + } +} + +/** + * @brief Transpose a contiguous matrix. The output have same layout policy as input. + * + * @param in Input matrix, the storage should be contiguous. + */ +template +auto transpose(handle_t const& handle, device_matrix_view in) + -> std::enable_if_t && + (std::is_same_v || + std::is_same_v), + device_matrix> +{ + auto out = make_device_matrix(handle, in.extent(1), in.extent(0)); + transpose(handle, in, out.view()); + return out; +} }; // end namespace linalg }; // end namespace raft -#endif \ No newline at end of file +#endif diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index 3bb30c9f33..64b41d1146 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -113,5 +113,38 @@ INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValF, ::testing::ValuesIn( INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValD, ::testing::ValuesIn(inputsd2)); + +template +void test_transpose_with_mdspan() +{ + handle_t handle; + auto v = make_device_matrix(handle, 32, 3); + T k{0}; + for (size_t i = 0; i < v.extent(0); ++i) { + for (size_t j = 0; j < v.extent(1); ++j) { + v(i, j) = k++; + } + } + auto out = transpose(handle, v.view()); + static_assert(std::is_same_v); + ASSERT_EQ(out.extent(0), v.extent(1)); + ASSERT_EQ(out.extent(1), v.extent(0)); + + k = 0; + for (size_t i = 0; i < out.extent(1); ++i) { + for (size_t j = 0; j < out.extent(0); ++j) { + ASSERT_EQ(out(j, i), k++); + } + } +} + +TEST(TransposeTest, MDSpan) +{ + test_transpose_with_mdspan(); + test_transpose_with_mdspan(); + + test_transpose_with_mdspan(); + test_transpose_with_mdspan(); +} } // end namespace linalg } // end namespace raft From 709273970ca8411abc2d8a15582b39bba71e5994 Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 05:05:54 +0800 Subject: [PATCH 02/15] cleanup. --- cpp/include/raft/linalg/transpose.cuh | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 9d0d68049d..ca2c1305b7 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -63,18 +63,18 @@ void transpose(math_t* inout, int n, cudaStream_t stream) * @param out Output matirx, storage is pre-allocated by caller and should be contiguous. */ template -std::enable_if_t && - (std::is_same_v || - std::is_same_v), - void> -transpose(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) +auto transpose(handle_t const& handle, + device_matrix_view in, + device_matrix_view out) + -> std::enable_if_t && + (std::is_same_v || + std::is_same_v), + void> { - ASSERT(out.extent(0) == in.extent(1), "Invalid shape for transpose."); - ASSERT(out.extent(1) == in.extent(0), "Invalid shape for transpose."); - ASSERT(in.is_contiguous(), "Invalid format for transpose input."); - ASSERT(out.is_contiguous(), "Invalid format for transpose output."); + RAFT_EXPECTS(out.extent(0) == in.extent(1), "Invalid shape for transpose."); + RAFT_EXPECTS(out.extent(1) == in.extent(0), "Invalid shape for transpose."); + RAFT_EXPECTS(in.is_contiguous(), "Invalid format for transpose input."); + RAFT_EXPECTS(out.is_contiguous(), "Invalid format for transpose output."); size_t out_n_rows = in.extent(1); size_t out_n_cols = in.extent(0); @@ -112,7 +112,7 @@ transpose(handle_t const& handle, out.stride(1), handle.get_stream())); } else { - ASSERT(false, "Unknown layout."); + RAFT_EXPECTS(false, "Unknown layout."); } } @@ -120,6 +120,8 @@ transpose(handle_t const& handle, * @brief Transpose a contiguous matrix. The output have same layout policy as input. * * @param in Input matrix, the storage should be contiguous. + * + * @return The transposed matrix */ template auto transpose(handle_t const& handle, device_matrix_view in) From abd660fa43d490ec350ae0bfe8da95ef9484b6dc Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 05:08:04 +0800 Subject: [PATCH 03/15] constexpr. --- cpp/include/raft/linalg/transpose.cuh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index ca2c1305b7..3fdfec3281 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -76,8 +76,8 @@ auto transpose(handle_t const& handle, RAFT_EXPECTS(in.is_contiguous(), "Invalid format for transpose input."); RAFT_EXPECTS(out.is_contiguous(), "Invalid format for transpose output."); - size_t out_n_rows = in.extent(1); - size_t out_n_cols = in.extent(0); + auto out_n_rows = in.extent(1); + auto out_n_cols = in.extent(0); T constexpr kOne = 1; T constexpr kZero = 0; @@ -96,7 +96,8 @@ auto transpose(handle_t const& handle, out.data(), out.stride(0), handle.get_stream())); - } else if (std::is_same_v) { + } else { + static_assert(std::is_same_v); CUBLAS_TRY(detail::cublasgeam(handle.get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, @@ -111,8 +112,6 @@ auto transpose(handle_t const& handle, out.data(), out.stride(1), handle.get_stream())); - } else { - RAFT_EXPECTS(false, "Unknown layout."); } } From 09b5ce30443f0e6fe06c16d0d0692a1c167e0352 Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 05:14:19 +0800 Subject: [PATCH 04/15] Lint. --- cpp/test/linalg/transpose.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index 64b41d1146..e9866d3c9e 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -113,7 +113,6 @@ INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValF, ::testing::ValuesIn( INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValD, ::testing::ValuesIn(inputsd2)); - template void test_transpose_with_mdspan() { From 625e3a4977391e77e4331ab5c6783c59a404a2ad Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 06:38:33 +0800 Subject: [PATCH 05/15] Doxygen. --- cpp/include/raft/linalg/transpose.cuh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 3fdfec3281..027b5bc2b1 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -59,8 +59,13 @@ void transpose(math_t* inout, int n, cudaStream_t stream) /** * @brief Transpose a contiguous matrix. The output have same layout policy as input. * - * @param in Input matrix, the storage should be contiguous. - * @param out Output matirx, storage is pre-allocated by caller and should be contiguous. + * @tparam T Data type of the input matrix elements + * @tparam LayoutPolicy Layout type of the input matrix, should be either + * layout_c_contiguous or layout_f_contiguous + * + * @param[in] handle raft handle for managing expensive cuda resources + * @param[in] in Input matrix, the storage should be contiguous. + * @param[out] out Output matirx, storage is pre-allocated by caller and should be contiguous. */ template auto transpose(handle_t const& handle, @@ -118,7 +123,12 @@ auto transpose(handle_t const& handle, /** * @brief Transpose a contiguous matrix. The output have same layout policy as input. * - * @param in Input matrix, the storage should be contiguous. + * @tparam T Data type of the input matrix elements + * @tparam LayoutPolicy Layout type of the input matrix, should be either + * layout_c_contiguous or layout_f_contiguous + * + * @param[in] handle raft handle for managing expensive cuda resources + * @param[in] in Input matrix, the storage should be contiguous. * * @return The transposed matrix */ From c96b4f5aa39f92c8726bbda8576f2e64993e4a5e Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 06:42:28 +0800 Subject: [PATCH 06/15] Doxygen. --- cpp/include/raft/linalg/transpose.cuh | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 027b5bc2b1..7eda2c7ca0 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -57,15 +57,16 @@ void transpose(math_t* inout, int n, cudaStream_t stream) } /** - * @brief Transpose a contiguous matrix. The output have same layout policy as input. + * @brief Transpose a contiguous matrix. The output has same layout policy as the input. * - * @tparam T Data type of the input matrix elements + * @tparam T Data type of input matrix element. * @tparam LayoutPolicy Layout type of the input matrix, should be either - * layout_c_contiguous or layout_f_contiguous + * `layout_c_contiguous` or `layout_f_contiguous`. * - * @param[in] handle raft handle for managing expensive cuda resources - * @param[in] in Input matrix, the storage should be contiguous. - * @param[out] out Output matirx, storage is pre-allocated by caller and should be contiguous. + * @param[in] handle raft handle for managing expensive cuda resources. + * @param[in] in Input matrix, the storage should be contiguous. + * @param[out] out Output matirx, storage is pre-allocated by caller and should be + * contiguous. */ template auto transpose(handle_t const& handle, @@ -121,16 +122,16 @@ auto transpose(handle_t const& handle, } /** - * @brief Transpose a contiguous matrix. The output have same layout policy as input. + * @brief Transpose a contiguous matrix. The output has same layout policy as the input. * - * @tparam T Data type of the input matrix elements + * @tparam T Data type of input matrix elements. * @tparam LayoutPolicy Layout type of the input matrix, should be either - * layout_c_contiguous or layout_f_contiguous + * `layout_c_contiguous` or `layout_f_contiguous`. * - * @param[in] handle raft handle for managing expensive cuda resources - * @param[in] in Input matrix, the storage should be contiguous. + * @param[in] handle raft handle for managing expensive cuda resources. + * @param[in] in Input matrix, the storage should be contiguous. * - * @return The transposed matrix + * @return The transposed matrix. */ template auto transpose(handle_t const& handle, device_matrix_view in) From 3fc035586d8ac9c78b075b07e87616853b1fb7de Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 15 Jul 2022 12:07:13 +0800 Subject: [PATCH 07/15] Support sub-matrix. --- cpp/include/raft/core/mdarray.hpp | 10 +- cpp/include/raft/linalg/detail/transpose.cuh | 51 ++++++++ cpp/include/raft/linalg/transpose.cuh | 115 +++++++++++-------- cpp/test/linalg/transpose.cu | 43 +++++++ 4 files changed, 165 insertions(+), 54 deletions(-) diff --git a/cpp/include/raft/core/mdarray.hpp b/cpp/include/raft/core/mdarray.hpp index a4f6ca67b1..f92fb6b3fa 100644 --- a/cpp/include/raft/core/mdarray.hpp +++ b/cpp/include/raft/core/mdarray.hpp @@ -33,7 +33,7 @@ namespace raft { /** - * @\brief Dimensions extents for raft::host_mdspan or raft::device_mdspan + * @brief Dimensions extents for raft::host_mdspan or raft::device_mdspan */ template using extents = std::experimental::extents; @@ -56,6 +56,11 @@ using layout_f_contiguous = layout_left; using col_major = layout_left; /** @} */ +/** + * @brief Strided layout for non-contiguous memory. + */ +using detail::stdex::layout_stride; + /** * @defgroup Common mdarray/mdspan extent types. The rank is known at compile time, each dimension * is known at run time (dynamic_extent in each dimension). @@ -409,8 +414,7 @@ class mdarray auto operator()(IndexType&&... indices) -> std::enable_if_t && ...) && - std::is_constructible_v && - std::is_constructible_v, + std::is_constructible_v, /* device policy is not default constructible due to requirement for CUDA stream. */ /* std::is_default_constructible_v */ diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index cea7bdf5e1..516d9f04c2 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -18,6 +18,7 @@ #include "cublas_wrappers.hpp" +#include #include #include #include @@ -78,6 +79,56 @@ void transpose(math_t* inout, int n, cudaStream_t stream) } }); } + +template +void transpose_row_major_impl(handle_t const& handle, + device_matrix_view in, + device_matrix_view out) +{ + auto out_n_rows = in.extent(1); + auto out_n_cols = in.extent(0); + T constexpr kOne = 1; + T constexpr kZero = 0; + CUBLAS_TRY(cublasgeam(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + out_n_cols, + out_n_rows, + &kOne, + in.data(), + in.stride(0), + &kZero, + static_cast(nullptr), + out.stride(0), + out.data(), + out.stride(0), + handle.get_stream())); +} + +template +void transpose_col_major_impl(handle_t const& handle, + device_matrix_view in, + device_matrix_view out) +{ + auto out_n_rows = in.extent(1); + auto out_n_cols = in.extent(0); + T constexpr kOne = 1; + T constexpr kZero = 0; + CUBLAS_TRY(cublasgeam(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + out_n_rows, + out_n_cols, + &kOne, + in.data(), + in.stride(1), + &kZero, + static_cast(nullptr), + out.stride(1), + out.data(), + out.stride(1), + handle.get_stream())); +} }; // end namespace detail }; // end namespace linalg }; // end namespace raft diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 7eda2c7ca0..c4b04ea990 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -57,84 +57,55 @@ void transpose(math_t* inout, int n, cudaStream_t stream) } /** - * @brief Transpose a contiguous matrix. The output has same layout policy as the input. + * @brief Transpose a matrix. The output has same layout policy as the input. * * @tparam T Data type of input matrix element. - * @tparam LayoutPolicy Layout type of the input matrix, should be either - * `layout_c_contiguous` or `layout_f_contiguous`. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. * * @param[in] handle raft handle for managing expensive cuda resources. - * @param[in] in Input matrix, the storage should be contiguous. - * @param[out] out Output matirx, storage is pre-allocated by caller and should be - * contiguous. + * @param[in] in Input matrix. + * @param[out] out Output matirx, storage is pre-allocated by caller. */ template auto transpose(handle_t const& handle, device_matrix_view in, device_matrix_view out) - -> std::enable_if_t && - (std::is_same_v || - std::is_same_v), - void> + -> std::enable_if_t, void> { RAFT_EXPECTS(out.extent(0) == in.extent(1), "Invalid shape for transpose."); RAFT_EXPECTS(out.extent(1) == in.extent(0), "Invalid shape for transpose."); - RAFT_EXPECTS(in.is_contiguous(), "Invalid format for transpose input."); - RAFT_EXPECTS(out.is_contiguous(), "Invalid format for transpose output."); - auto out_n_rows = in.extent(1); - auto out_n_cols = in.extent(0); - - T constexpr kOne = 1; - T constexpr kZero = 0; if constexpr (std::is_same_v) { - CUBLAS_TRY(detail::cublasgeam(handle.get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - out_n_cols, - out_n_rows, - &kOne, - in.data(), - in.stride(0), - &kZero, - static_cast(nullptr), - out.stride(0), - out.data(), - out.stride(0), - handle.get_stream())); + detail::transpose_row_major_impl(handle, in, out); + } else if (std::is_same_v) { + detail::transpose_col_major_impl(handle, in, out); } else { - static_assert(std::is_same_v); - CUBLAS_TRY(detail::cublasgeam(handle.get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - out_n_rows, - out_n_cols, - &kOne, - in.data(), - in.stride(1), - &kZero, - static_cast(nullptr), - out.stride(1), - out.data(), - out.stride(1), - handle.get_stream())); + RAFT_EXPECTS(in.stride(0) == 1 || in.stride(1) == 1, "Unsupported matrix layout."); + if (in.stride(1) == 1) { + // row-major submatrix + detail::transpose_row_major_impl(handle, in, out); + } else { + // col-major submatrix + detail::transpose_col_major_impl(handle, in, out); + } } } /** - * @brief Transpose a contiguous matrix. The output has same layout policy as the input. + * @brief Transpose a matrix. The output has same layout policy as the input. * * @tparam T Data type of input matrix elements. - * @tparam LayoutPolicy Layout type of the input matrix, should be either - * `layout_c_contiguous` or `layout_f_contiguous`. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. * * @param[in] handle raft handle for managing expensive cuda resources. - * @param[in] in Input matrix, the storage should be contiguous. + * @param[in] in Input matrix. * * @return The transposed matrix. */ template -auto transpose(handle_t const& handle, device_matrix_view in) +[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) -> std::enable_if_t && (std::is_same_v || std::is_same_v), @@ -144,6 +115,48 @@ auto transpose(handle_t const& handle, device_matrix_view in) transpose(handle, in, out.view()); return out; } + +/** + * @brief Transpose a matrix. The output has same layout policy as the input. + * + * @tparam T Data type of input matrix elements. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * + * @param[in] handle raft handle for managing expensive cuda resources. + * @param[in] in Input matrix. + * + * @return The transposed matrix. + */ +template +[[nodiscard]] auto transpose(handle_t const& handle, + device_matrix_view in) + -> std::enable_if_t, + device_matrix> +{ + using extent_type = raft::extents; + extent_type exts{in.extent(1), in.extent(0)}; + using policy_type = typename raft::device_matrix::container_policy_type; + policy_type policy(handle.get_stream()); + + RAFT_EXPECTS(in.stride(0) == 1 || in.stride(1) == 1, "Unsupported matrix layout."); + if (in.stride(1) == 1) { + // row-major submatrix + std::array strides{in.extent(0), 1}; + auto layout = layout_stride::mapping{exts, strides}; + raft::device_matrix out{layout, policy}; + transpose(handle, in, out.view()); + return out; + } else { + RAFT_EXPECTS(in.stride(0) == 1, "Unsupported layout type."); + // col-major submatrix + std::array strides{1, in.extent(1)}; + auto layout = layout_stride::mapping{exts, strides}; + raft::device_matrix out{layout, policy}; + transpose(handle, in, out.view()); + return out; + } +} }; // end namespace linalg }; // end namespace raft diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index e9866d3c9e..642149b8ae 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -113,6 +113,7 @@ INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValF, ::testing::ValuesIn( INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValD, ::testing::ValuesIn(inputsd2)); +namespace { template void test_transpose_with_mdspan() { @@ -136,6 +137,7 @@ void test_transpose_with_mdspan() } } } +} // namespace TEST(TransposeTest, MDSpan) { @@ -145,5 +147,46 @@ TEST(TransposeTest, MDSpan) test_transpose_with_mdspan(); test_transpose_with_mdspan(); } + +namespace { +template +void test_transpose_submatrix() +{ + handle_t handle; + auto v = make_device_matrix(handle, 32, 33); + T k{0}; + size_t row_beg{3}, row_end{13}, col_beg{2}, col_end{11}; + for (size_t i = row_beg; i < row_end; ++i) { + for (size_t j = col_beg; j < col_end; ++j) { + v(i, j) = k++; + } + } + + auto vv = v.view(); + auto submat = raft::detail::stdex::submdspan( + vv, std::make_tuple(row_beg, row_end), std::make_tuple(col_beg, col_end)); + static_assert(std::is_same_v); + + auto out = transpose(handle, submat); + ASSERT_EQ(out.extent(0), submat.extent(1)); + ASSERT_EQ(out.extent(1), submat.extent(0)); + + k = 0; + for (size_t i = 0; i < out.extent(1); ++i) { + for (size_t j = 0; j < out.extent(0); ++j) { + ASSERT_EQ(out(j, i), k++); + } + } +} +} // namespace + +TEST(TransposeTest, SubMatrix) +{ + test_transpose_submatrix(); + test_transpose_submatrix(); + + test_transpose_submatrix(); + test_transpose_submatrix(); +} } // end namespace linalg } // end namespace raft From 28080ffb26b872f148cf79ca8a5f0a19721ffd59 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 15 Jul 2022 12:15:09 +0800 Subject: [PATCH 08/15] Remove prefix. --- cpp/include/raft/linalg/transpose.cuh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index c4b04ea990..090ce9cf9c 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -129,10 +129,8 @@ template * @return The transposed matrix. */ template -[[nodiscard]] auto transpose(handle_t const& handle, - device_matrix_view in) - -> std::enable_if_t, - device_matrix> +[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) + -> std::enable_if_t, device_matrix> { using extent_type = raft::extents; extent_type exts{in.extent(1), in.extent(0)}; From 3bfc146e7ac139dcc333f7a95837347de76c4968 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 15 Jul 2022 17:13:25 +0800 Subject: [PATCH 09/15] Cleanup. --- cpp/include/raft/linalg/detail/transpose.cuh | 12 ++++++------ cpp/include/raft/linalg/transpose.cuh | 8 +++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index 516d9f04c2..16f42f8ccb 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -80,10 +80,10 @@ void transpose(math_t* inout, int n, cudaStream_t stream) }); } -template +template void transpose_row_major_impl(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) + device_matrix_view in, + device_matrix_view out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); @@ -105,10 +105,10 @@ void transpose_row_major_impl(handle_t const& handle, handle.get_stream())); } -template +template void transpose_col_major_impl(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) + device_matrix_view in, + device_matrix_view out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 090ce9cf9c..c1c906ad5b 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -132,8 +132,7 @@ template [[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) -> std::enable_if_t, device_matrix> { - using extent_type = raft::extents; - extent_type exts{in.extent(1), in.extent(0)}; + matrix_extent exts{in.extent(1), in.extent(0)}; using policy_type = typename raft::device_matrix::container_policy_type; policy_type policy(handle.get_stream()); @@ -141,15 +140,14 @@ template if (in.stride(1) == 1) { // row-major submatrix std::array strides{in.extent(0), 1}; - auto layout = layout_stride::mapping{exts, strides}; + auto layout = layout_stride::mapping{exts, strides}; raft::device_matrix out{layout, policy}; transpose(handle, in, out.view()); return out; } else { - RAFT_EXPECTS(in.stride(0) == 1, "Unsupported layout type."); // col-major submatrix std::array strides{1, in.extent(1)}; - auto layout = layout_stride::mapping{exts, strides}; + auto layout = layout_stride::mapping{exts, strides}; raft::device_matrix out{layout, policy}; transpose(handle, in, out.view()); return out; From 50a9eff59aefcb78931ee4d7c30120b7b9ac14a0 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 26 Jul 2022 13:11:31 +0800 Subject: [PATCH 10/15] Move some functions into tests. --- cpp/include/raft/linalg/transpose.cuh | 62 ------------------------- cpp/test/linalg/transpose.cu | 67 +++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 62 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index c1c906ad5b..ab93125715 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -91,68 +91,6 @@ auto transpose(handle_t const& handle, } } } - -/** - * @brief Transpose a matrix. The output has same layout policy as the input. - * - * @tparam T Data type of input matrix elements. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. - * - * @param[in] handle raft handle for managing expensive cuda resources. - * @param[in] in Input matrix. - * - * @return The transposed matrix. - */ -template -[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) - -> std::enable_if_t && - (std::is_same_v || - std::is_same_v), - device_matrix> -{ - auto out = make_device_matrix(handle, in.extent(1), in.extent(0)); - transpose(handle, in, out.view()); - return out; -} - -/** - * @brief Transpose a matrix. The output has same layout policy as the input. - * - * @tparam T Data type of input matrix elements. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. - * - * @param[in] handle raft handle for managing expensive cuda resources. - * @param[in] in Input matrix. - * - * @return The transposed matrix. - */ -template -[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) - -> std::enable_if_t, device_matrix> -{ - matrix_extent exts{in.extent(1), in.extent(0)}; - using policy_type = typename raft::device_matrix::container_policy_type; - policy_type policy(handle.get_stream()); - - RAFT_EXPECTS(in.stride(0) == 1 || in.stride(1) == 1, "Unsupported matrix layout."); - if (in.stride(1) == 1) { - // row-major submatrix - std::array strides{in.extent(0), 1}; - auto layout = layout_stride::mapping{exts, strides}; - raft::device_matrix out{layout, policy}; - transpose(handle, in, out.view()); - return out; - } else { - // col-major submatrix - std::array strides{1, in.extent(1)}; - auto layout = layout_stride::mapping{exts, strides}; - raft::device_matrix out{layout, policy}; - transpose(handle, in, out.view()); - return out; - } -} }; // end namespace linalg }; // end namespace raft diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index 642149b8ae..5ef5dc60d2 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -114,6 +114,73 @@ INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValF, ::testing::ValuesIn( INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValD, ::testing::ValuesIn(inputsd2)); namespace { +/** + * We hide these functions in tests for now until we have a heterogeneous mdarray + * implementation. + */ + +/** + * @brief Transpose a matrix. The output has same layout policy as the input. + * + * @tparam T Data type of input matrix elements. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * + * @param[in] handle raft handle for managing expensive cuda resources. + * @param[in] in Input matrix. + * + * @return The transposed matrix. + */ +template +[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) + -> std::enable_if_t && + (std::is_same_v || + std::is_same_v), + device_matrix> +{ + auto out = make_device_matrix(handle, in.extent(1), in.extent(0)); + ::raft::linalg::transpose(handle, in, out.view()); + return out; +} + +/** + * @brief Transpose a matrix. The output has same layout policy as the input. + * + * @tparam T Data type of input matrix elements. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * + * @param[in] handle raft handle for managing expensive cuda resources. + * @param[in] in Input matrix. + * + * @return The transposed matrix. + */ +template +[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) + -> std::enable_if_t, device_matrix> +{ + matrix_extent exts{in.extent(1), in.extent(0)}; + using policy_type = typename raft::device_matrix::container_policy_type; + policy_type policy(handle.get_stream()); + + RAFT_EXPECTS(in.stride(0) == 1 || in.stride(1) == 1, "Unsupported matrix layout."); + if (in.stride(1) == 1) { + // row-major submatrix + std::array strides{in.extent(0), 1}; + auto layout = layout_stride::mapping{exts, strides}; + raft::device_matrix out{layout, policy}; + ::raft::linalg::transpose(handle, in, out.view()); + return out; + } else { + // col-major submatrix + std::array strides{1, in.extent(1)}; + auto layout = layout_stride::mapping{exts, strides}; + raft::device_matrix out{layout, policy}; + ::raft::linalg::transpose(handle, in, out.view()); + return out; + } +} + template void test_transpose_with_mdspan() { From 3c300b996dada3b34b388f3d5c9a34b81aa33945 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 26 Jul 2022 13:17:52 +0800 Subject: [PATCH 11/15] Use standard mdspan. --- cpp/include/raft/linalg/transpose.cuh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index ab93125715..5d17f5c81a 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -62,15 +62,17 @@ void transpose(math_t* inout, int n, cudaStream_t stream) * @tparam T Data type of input matrix element. * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * * @param[in] handle raft handle for managing expensive cuda resources. * @param[in] in Input matrix. * @param[out] out Output matirx, storage is pre-allocated by caller. */ -template +template auto transpose(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) + mdspan in, + mdspan out) -> std::enable_if_t, void> { RAFT_EXPECTS(out.extent(0) == in.extent(1), "Invalid shape for transpose."); From 22dfb48776cc1ac2cd7775a908723859e81b145e Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 26 Jul 2022 23:01:08 +0800 Subject: [PATCH 12/15] Reviewer's comments. --- cpp/include/raft/linalg/transpose.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 5d17f5c81a..34aa18f9a1 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -71,8 +71,8 @@ void transpose(math_t* inout, int n, cudaStream_t stream) */ template auto transpose(handle_t const& handle, - mdspan in, - mdspan out) + raft::mdspan in, + raft::mdspan out) -> std::enable_if_t, void> { RAFT_EXPECTS(out.extent(0) == in.extent(1), "Invalid shape for transpose."); From c15bb3a01f3506a674f1093d2b375bf533279275 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 28 Jul 2022 11:59:35 +0800 Subject: [PATCH 13/15] Reviewer's comment. --- cpp/include/raft/linalg/detail/transpose.cuh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index 16f42f8ccb..b3d73ab4f3 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -80,10 +80,10 @@ void transpose(math_t* inout, int n, cudaStream_t stream) }); } -template +template void transpose_row_major_impl(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) + raft::mdspan in, + raft::mdspan out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); @@ -105,10 +105,10 @@ void transpose_row_major_impl(handle_t const& handle, handle.get_stream())); } -template +template void transpose_col_major_impl(handle_t const& handle, - device_matrix_view in, - device_matrix_view out) + raft::mdspan in, + raft::mdspan out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); From 87fb85411c79ca8487448adc8af750463546582e Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 28 Jul 2022 12:06:46 +0800 Subject: [PATCH 14/15] Add index type. --- cpp/include/raft/linalg/detail/transpose.cuh | 18 ++++++----- cpp/include/raft/linalg/transpose.cuh | 7 +++-- cpp/test/linalg/transpose.cu | 33 +++++++++++--------- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index b3d73ab4f3..362ba6a7d9 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -80,10 +80,11 @@ void transpose(math_t* inout, int n, cudaStream_t stream) }); } -template -void transpose_row_major_impl(handle_t const& handle, - raft::mdspan in, - raft::mdspan out) +template +void transpose_row_major_impl( + handle_t const& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> in, + raft::mdspan, LayoutPolicy, AccessorPolicy> out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); @@ -105,10 +106,11 @@ void transpose_row_major_impl(handle_t const& handle, handle.get_stream())); } -template -void transpose_col_major_impl(handle_t const& handle, - raft::mdspan in, - raft::mdspan out) +template +void transpose_col_major_impl( + handle_t const& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> in, + raft::mdspan, LayoutPolicy, AccessorPolicy> out) { auto out_n_rows = in.extent(1); auto out_n_cols = in.extent(0); diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index 34aa18f9a1..cd78a2f495 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -60,6 +60,7 @@ void transpose(math_t* inout, int n, cudaStream_t stream) * @brief Transpose a matrix. The output has same layout policy as the input. * * @tparam T Data type of input matrix element. + * @tparam IndexType Index type of matrix extent. * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can * be a submatrix of a larger matrix. Arbitrary stride is not supported. * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on @@ -69,10 +70,10 @@ void transpose(math_t* inout, int n, cudaStream_t stream) * @param[in] in Input matrix. * @param[out] out Output matirx, storage is pre-allocated by caller. */ -template +template auto transpose(handle_t const& handle, - raft::mdspan in, - raft::mdspan out) + raft::mdspan, LayoutPolicy, AccessorPolicy> in, + raft::mdspan, LayoutPolicy, AccessorPolicy> out) -> std::enable_if_t, void> { RAFT_EXPECTS(out.extent(0) == in.extent(1), "Invalid shape for transpose."); diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index 5ef5dc60d2..98f6d5e7e4 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -131,14 +131,15 @@ namespace { * * @return The transposed matrix. */ -template -[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) +template +[[nodiscard]] auto transpose(handle_t const& handle, + device_matrix_view in) -> std::enable_if_t && (std::is_same_v || std::is_same_v), - device_matrix> + device_matrix> { - auto out = make_device_matrix(handle, in.extent(1), in.extent(0)); + auto out = make_device_matrix(handle, in.extent(1), in.extent(0)); ::raft::linalg::transpose(handle, in, out.view()); return out; } @@ -155,27 +156,29 @@ template * * @return The transposed matrix. */ -template -[[nodiscard]] auto transpose(handle_t const& handle, device_matrix_view in) - -> std::enable_if_t, device_matrix> +template +[[nodiscard]] auto transpose(handle_t const& handle, + device_matrix_view in) + -> std::enable_if_t, device_matrix> { - matrix_extent exts{in.extent(1), in.extent(0)}; - using policy_type = typename raft::device_matrix::container_policy_type; + matrix_extent exts{in.extent(1), in.extent(0)}; + using policy_type = + typename raft::device_matrix::container_policy_type; policy_type policy(handle.get_stream()); RAFT_EXPECTS(in.stride(0) == 1 || in.stride(1) == 1, "Unsupported matrix layout."); if (in.stride(1) == 1) { // row-major submatrix std::array strides{in.extent(0), 1}; - auto layout = layout_stride::mapping{exts, strides}; - raft::device_matrix out{layout, policy}; + auto layout = layout_stride::mapping>{exts, strides}; + raft::device_matrix out{layout, policy}; ::raft::linalg::transpose(handle, in, out.view()); return out; } else { // col-major submatrix std::array strides{1, in.extent(1)}; - auto layout = layout_stride::mapping{exts, strides}; - raft::device_matrix out{layout, policy}; + auto layout = layout_stride::mapping>{exts, strides}; + raft::device_matrix out{layout, policy}; ::raft::linalg::transpose(handle, in, out.view()); return out; } @@ -185,7 +188,7 @@ template void test_transpose_with_mdspan() { handle_t handle; - auto v = make_device_matrix(handle, 32, 3); + auto v = make_device_matrix(handle, 32, 3); T k{0}; for (size_t i = 0; i < v.extent(0); ++i) { for (size_t j = 0; j < v.extent(1); ++j) { @@ -220,7 +223,7 @@ template void test_transpose_submatrix() { handle_t handle; - auto v = make_device_matrix(handle, 32, 33); + auto v = make_device_matrix(handle, 32, 33); T k{0}; size_t row_beg{3}, row_end{13}, col_beg{2}, col_end{11}; for (size_t i = row_beg; i < row_end; ++i) { From 36b2ec80d5dd5bcaaf3ba89e325acddea071bc0d Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 28 Jul 2022 16:54:16 +0800 Subject: [PATCH 15/15] Fix build. --- cpp/include/raft/linalg/detail/transpose.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index 362ba6a7d9..242d3a3912 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -96,12 +96,12 @@ void transpose_row_major_impl( out_n_cols, out_n_rows, &kOne, - in.data(), + in.data_handle(), in.stride(0), &kZero, static_cast(nullptr), out.stride(0), - out.data(), + out.data_handle(), out.stride(0), handle.get_stream())); } @@ -122,12 +122,12 @@ void transpose_col_major_impl( out_n_rows, out_n_cols, &kOne, - in.data(), + in.data_handle(), in.stride(1), &kZero, static_cast(nullptr), out.stride(1), - out.data(), + out.data_handle(), out.stride(1), handle.get_stream())); }