From 82a8fb984baa3b547ab777a242c50680259b4721 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 24 Aug 2023 09:01:46 +0100 Subject: [PATCH 1/7] [SPARSE] Add support for sparse gemv with MKLCPU --- CMakeLists.txt | 5 + README.md | 2 + examples/include/example_helper.hpp | 64 +++- examples/sparse_blas/CMakeLists.txt | 25 ++ .../compile_time_dispatching/CMakeLists.txt | 44 +++ .../sparse_blas_gemv_usm_mklcpu.cpp | 249 +++++++++++++++ .../run_time_dispatching/CMakeLists.txt | 65 ++++ .../sparse_blas_gemv_usm.cpp | 257 ++++++++++++++++ include/oneapi/mkl.hpp | 1 + include/oneapi/mkl/detail/backends_table.hpp | 16 +- include/oneapi/mkl/sparse_blas.hpp | 37 +++ .../mkl/sparse_blas/detail/helper_types.hpp | 52 ++++ .../mklcpu/onemkl_sparse_blas_mklcpu.hpp | 34 ++ .../detail/mklcpu/sparse_blas_ct.hpp | 41 +++ .../detail/onemkl_sparse_blas_backends.hxx | 82 +++++ .../mkl/sparse_blas/detail/sparse_blas_ct.hxx | 121 ++++++++ .../mkl/sparse_blas/detail/sparse_blas_rt.hpp | 95 ++++++ include/oneapi/mkl/sparse_blas/types.hpp | 44 +++ src/sparse_blas/CMakeLists.txt | 47 +++ src/sparse_blas/backends/CMakeLists.txt | 22 ++ src/sparse_blas/backends/backend_wrappers.cxx | 83 +++++ .../backends/mkl_common/mkl_basic.cxx | 62 ++++ .../backends/mkl_common/mkl_helper.hpp | 56 ++++ .../backends/mkl_common/mkl_operations.cxx | 125 ++++++++ .../backends/mklcpu/CMakeLists.txt | 83 +++++ .../backends/mklcpu/mklcpu_basic.cpp | 28 ++ .../backends/mklcpu/mklcpu_operations.cpp | 28 ++ .../backends/mklcpu/mklcpu_wrappers.cpp | 32 ++ src/sparse_blas/function_table.hpp | 291 ++++++++++++++++++ src/sparse_blas/sparse_blas_loader.cpp | 167 ++++++++++ tests/unit_tests/CMakeLists.txt | 6 + tests/unit_tests/sparse_blas/CMakeLists.txt | 20 ++ .../sparse_blas/include/sparse_reference.hpp | 149 +++++++++ .../sparse_blas/include/test_common.hpp | 248 +++++++++++++++ .../sparse_blas/source/CMakeLists.txt | 56 ++++ .../sparse_blas/source/sparse_gemv_buffer.cpp | 183 +++++++++++ .../sparse_blas/source/sparse_gemv_usm.cpp | 209 +++++++++++++ 37 files changed, 3125 insertions(+), 4 deletions(-) create mode 100644 examples/sparse_blas/CMakeLists.txt create mode 100644 examples/sparse_blas/compile_time_dispatching/CMakeLists.txt create mode 100644 examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp create mode 100644 examples/sparse_blas/run_time_dispatching/CMakeLists.txt create mode 100644 examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp create mode 100644 include/oneapi/mkl/sparse_blas.hpp create mode 100644 include/oneapi/mkl/sparse_blas/detail/helper_types.hpp create mode 100644 include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp create mode 100644 include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp create mode 100644 include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx create mode 100644 include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx create mode 100644 include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp create mode 100644 include/oneapi/mkl/sparse_blas/types.hpp create mode 100644 src/sparse_blas/CMakeLists.txt create mode 100644 src/sparse_blas/backends/CMakeLists.txt create mode 100644 src/sparse_blas/backends/backend_wrappers.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_basic.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_helper.hpp create mode 100644 src/sparse_blas/backends/mkl_common/mkl_operations.cxx create mode 100644 src/sparse_blas/backends/mklcpu/CMakeLists.txt create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp create mode 100644 src/sparse_blas/function_table.hpp create mode 100644 src/sparse_blas/sparse_blas_loader.cpp create mode 100644 tests/unit_tests/sparse_blas/CMakeLists.txt create mode 100644 tests/unit_tests/sparse_blas/include/sparse_reference.hpp create mode 100644 tests/unit_tests/sparse_blas/include/test_common.hpp create mode 100644 tests/unit_tests/sparse_blas/source/CMakeLists.txt create mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e29fcc30..15540c9df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,9 @@ if(ENABLE_MKLGPU_BACKEND OR ENABLE_ROCFFT_BACKEND) list(APPEND DOMAINS_LIST "dft") endif() +if(ENABLE_MKLCPU_BACKEND) + list(APPEND DOMAINS_LIST "sparse_blas") +endif() if(ENABLE_PORTBLAS_BACKEND AND (ENABLE_MKLCPU_BACKEND OR @@ -204,6 +207,8 @@ endif() if(NOT TARGET_DOMAINS OR TARGET_DOMAINS STREQUAL "None") # Set to all by default set(TARGET_DOMAINS ${DOMAINS_LIST}) + # Remove sparse_blas from the default until it is supported by MKLCPU and MKLGPU backends + list(REMOVE_ITEM TARGET_DOMAINS "sparse_blas") else() # Make sure the input was converted to list string(REPLACE " " ";" TARGET_DOMAINS ${TARGET_DOMAINS}) diff --git a/README.md b/README.md index ee30aece1..69f70d873 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,8 @@ $> clang++ -fsycl app.o –L$ONEMKL/lib –lonemkl_blas_mklcpu –lonemkl_blas_c Supported domains: BLAS, LAPACK, RNG, DFT +Support for SPARSE_BLAS domain is in progress and disabled by default. Use it at your own risks. + #### Linux* diff --git a/examples/include/example_helper.hpp b/examples/include/example_helper.hpp index 4f73f8971..312404ee7 100644 --- a/examples/include/example_helper.hpp +++ b/examples/include/example_helper.hpp @@ -20,11 +20,19 @@ #ifndef __EXAMPLE_HELPER_HPP__ #define __EXAMPLE_HELPER_HPP__ +#if __has_include() +#include +#else +#include +#endif + +#include + // // helpers for initializing templated scalar data type values. // template -fp set_fp_value(fp arg1, fp arg2 = 0.0) { +fp set_fp_value(fp arg1, fp /*arg2*/ = fp(0.0)) { return arg1; } @@ -67,4 +75,58 @@ void rand_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld) { } } +template +intType generate_sparse_matrix(const intType nx, intType *ia, intType *ja, fp *a, + const intType index = 0) { + intType nz = nx, ny = nx; + intType nnz = 0; + intType current_row; + + ia[0] = index; + + for (intType iz = 0; iz < nz; iz++) { + for (intType iy = 0; iy < ny; iy++) { + for (intType ix = 0; ix < nx; ix++) { + current_row = iz * nx * ny + iy * nx + ix; + + for (intType sz = -1; sz <= 1; sz++) { + if (iz + sz > -1 && iz + sz < nz) { + for (intType sy = -1; sy <= 1; sy++) { + if (iy + sy > -1 && iy + sy < ny) { + for (intType sx = -1; sx <= 1; sx++) { + if (ix + sx > -1 && ix + sx < nx) { + intType current_column = + current_row + sz * nx * ny + sy * nx + sx; + ja[nnz] = current_column + index; + if (current_column == current_row) { + a[nnz++] = set_fp_value(fp(26.0)); + } + else { + a[nnz++] = set_fp_value(fp(-1.0)); + } + } // end + // x + // bounds + // test + } // end sx loop + } // end y bounds test + } // end sy loop + } // end z bounds test + } // end sz loop + ia[current_row + 1] = nnz + index; + + } // end ix loop + } // end iy loop + } // end iz loop + return nnz; +} + +template +void free_vec(std::vector &ptr_vec, sycl::queue queue) { + for (auto ptr : ptr_vec) { + sycl::free(ptr, queue); + } + ptr_vec.clear(); +} + #endif //__EXAMPLE_HELPER_HPP__ diff --git a/examples/sparse_blas/CMakeLists.txt b/examples/sparse_blas/CMakeLists.txt new file mode 100644 index 000000000..721512429 --- /dev/null +++ b/examples/sparse_blas/CMakeLists.txt @@ -0,0 +1,25 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +add_subdirectory(compile_time_dispatching) + +# runtime compilation is only possible with dynamic libraries +if (BUILD_SHARED_LIBS) + add_subdirectory(run_time_dispatching) +endif() diff --git a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt new file mode 100644 index 000000000..cb95333b4 --- /dev/null +++ b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt @@ -0,0 +1,44 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +#Build object from all sources +set(SPARSE_BLAS_BACKENDS "") + +if(ENABLE_MKLCPU_BACKEND) + list(APPEND SPARSE_BLAS_BACKENDS "mklcpu") +endif() + +include(WarningsUtils) + +foreach(backend ${SPARSE_BLAS_BACKENDS}) + set(EXAMPLE_NAME example_sparse_blas_gemv_usm_${backend}) + add_executable(${EXAMPLE_NAME} sparse_blas_gemv_usm_${backend}.cpp) + target_include_directories(${EXAMPLE_NAME} + PUBLIC ${PROJECT_SOURCE_DIR}/examples/include + PUBLIC ${PROJECT_SOURCE_DIR}/include + PUBLIC ${CMAKE_BINARY_DIR}/bin + ) + + add_dependencies(${EXAMPLE_NAME} onemkl_sparse_blas_${backend}) + target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend}) + + # Register example as ctest + add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_gemv_usm_${backend} COMMAND ${EXAMPLE_NAME}) +endforeach(backend) + diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp new file mode 100644 index 000000000..4b86f976d --- /dev/null +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp @@ -0,0 +1,249 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +/* +* +* Content: +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* using unified shared memory to perform general sparse matrix-vector +* multiplication on a INTEL CPU SYCL device. +* +* y = alpha * op(A) * x + beta * y +* +* where op() is defined by one of +* +* oneapi::mkl::transpose::{nontrans,trans,conjtrans} +* +* +* This example demonstrates only single precision (float) data type for +* gemv matrix data +* +* +*******************************************************************************/ + +// stl includes +#include +#include + +#if __has_include() +#include +#else +#include +#endif +#include "oneapi/mkl.hpp" + +#include "example_helper.hpp" + +// +// Main example for Sparse Matrix-Vector Multiply consisting of +// initialization of A matrix, x and y vectors as well as +// scalars alpha and beta. Then the product +// +// y = alpha * op(A) * x + beta * y +// +// is performed and finally the results are post processed. +// +int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { + using fp = float; + using intType = std::int32_t; + + // Matrix data size + intType size = 4; + intType nrows = size * size * size; + + // Set scalar fp values + fp alpha = set_fp_value(fp(1.0)); + fp beta = set_fp_value(fp(0.0)); + + // Catch asynchronous exceptions + auto exception_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL " + "exception during sparse::gemv:\n" + << e.what() << std::endl; + } + } + }; + + // create execution queue and buffers of matrix data + sycl::queue cpu_queue(cpu_dev, exception_handler); + oneapi::mkl::backend_selector cpu_selector{ cpu_queue }; + + intType *ia, *ja; + fp *a, *x, *y, *z; + std::size_t sizea = static_cast(27 * nrows); + std::size_t sizeja = static_cast(27 * nrows); + std::size_t sizeia = static_cast(nrows + 1); + std::size_t sizevec = static_cast(nrows); + + ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), cpu_queue); + ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), cpu_queue); + a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), cpu_queue); + x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + + if (!ia || !ja || !a || !x || !y || !z) { + throw std::runtime_error("Failed to allocate USM memory"); + } + + intType nnz = generate_sparse_matrix(size, ia, ja, a); + + // Init vectors x and y + for (int i = 0; i < nrows; i++) { + x[i] = set_fp_value(fp(1.0)); + y[i] = set_fp_value(fp(0.0)); + z[i] = set_fp_value(fp(0.0)); + } + + std::vector int_ptr_vec; + int_ptr_vec.push_back(ia); + int_ptr_vec.push_back(ja); + std::vector fp_ptr_vec; + fp_ptr_vec.push_back(a); + fp_ptr_vec.push_back(x); + fp_ptr_vec.push_back(y); + fp_ptr_vec.push_back(z); + + // + // Execute Matrix Multiply + // + + oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; + std::cout << "\n\t\tsparse::gemv parameters:\n"; + std::cout << "\t\t\ttransA = " + << (transA == oneapi::mkl::transpose::nontrans + ? "nontrans" + : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans")) + << std::endl; + std::cout << "\t\t\tnrows = " << nrows << std::endl; + std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; + + // create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t handle = nullptr; + + oneapi::mkl::sparse::init_matrix_handle(cpu_selector, &handle); + + auto ev_set = oneapi::mkl::sparse::set_csr_data(cpu_selector, handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + auto ev_opt = oneapi::mkl::sparse::optimize_gemv(cpu_selector, transA, handle, { ev_set }); + + auto ev_gemv = + oneapi::mkl::sparse::gemv(cpu_selector, transA, alpha, handle, x, beta, y, { ev_opt }); + + auto ev_release = + oneapi::mkl::sparse::release_matrix_handle(cpu_selector, &handle, { ev_gemv }); + + ev_release.wait_and_throw(); + + // + // Post Processing + // + + fp *res = y; + for (intType row = 0; row < nrows; row++) { + fp tmp = set_fp_value(fp(0.0)); + for (intType i = ia[row]; i < ia[row + 1]; i++) { + tmp += a[i] * x[ja[i]]; + } + z[row] = alpha * tmp + beta * z[row]; + } + + fp diff = set_fp_value(fp(0.0)); + for (intType i = 0; i < nrows; i++) { + diff += (z[i] - res[i]) * (z[i] - res[i]); + } + std::cout << "\n\t\t sparse::gemv residual:\n" + << "\t\t\t" << diff << "\n\tFinished" << std::endl; + + free_vec(fp_ptr_vec, cpu_queue); + free_vec(int_ptr_vec, cpu_queue); + + if (diff > 0) + return 1; + + return 0; +} + +// +// Description of example setup, apis used and supported floating point type +// precisions +// +void print_example_banner() { + std::cout << "" << std::endl; + std::cout << "########################################################################" + << std::endl; + std::cout << "# Sparse Matrix-Vector Multiply Example: " << std::endl; + std::cout << "# " << std::endl; + std::cout << "# y = alpha * op(A) * x + beta * y" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# where A is a sparse matrix in CSR format, x and y are " + "dense vectors" + << std::endl; + std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Using apis:" << std::endl; + std::cout << "# sparse::gemv" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Using single precision (float) data type" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Running on Intel CPU device" << std::endl; + std::cout << "# " << std::endl; + std::cout << "########################################################################" + << std::endl; + std::cout << std::endl; +} + +// +// Main entry point for example +// +int main(int /*argc*/, char ** /*argv*/) { + print_example_banner(); + + try { + // TODO: Add cuSPARSE compile-time dispatcher in this example once it is supported. + sycl::device cpu_dev(sycl::cpu_selector_v); + + std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Device name is: " << cpu_dev.get_info() + << std::endl; + std::cout << "Running with single precision real data type:" << std::endl; + + run_sparse_matrix_vector_multiply_example(cpu_dev); + std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + } + catch (sycl::exception const &e) { + std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; + return 1; + } + catch (std::exception const &e) { + std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt new file mode 100644 index 000000000..12d829fe3 --- /dev/null +++ b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt @@ -0,0 +1,65 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +# NOTE: user needs to set env var SYCL_DEVICE_FILTER to use runtime example (no need to specify backend when building with CMake) + +include(WarningsUtils) + +# Build object from all example sources +set(SPARSE_BLAS_RT_SOURCES "sparse_blas_gemv_usm") +# Set up for the right backend for run-time dispatching examples +# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to +# overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend +set(DEVICE_FILTERS "") +if(ENABLE_MKLCPU_BACKEND) + list(APPEND DEVICE_FILTERS "cpu") +endif() + +message(STATUS "SYCL_DEVICE_FILTER will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples") + +foreach(sparse_blas_rt_sources ${SPARSE_BLAS_RT_SOURCES}) + add_executable(example_${sparse_blas_rt_sources} ${sparse_blas_rt_sources}.cpp) + target_include_directories(example_${sparse_blas_rt_sources} + PUBLIC ${PROJECT_SOURCE_DIR}/examples/include + PUBLIC ${PROJECT_SOURCE_DIR}/include + PUBLIC ${CMAKE_BINARY_DIR}/bin + ) + + add_dependencies(example_${sparse_blas_rt_sources} onemkl) + + if (USE_ADD_SYCL_TO_TARGET_INTEGRATION) + add_sycl_to_target(TARGET example_${sparse_blas_rt_sources} SOURCES ${SPARSE_BLAS_RT_SOURCES}) + endif() + + target_link_libraries(example_${sparse_blas_rt_sources} + PUBLIC onemkl + PUBLIC ONEMKL::SYCL::SYCL + PUBLIC ${CMAKE_DL_LIBS} + PRIVATE onemkl_warnings + ) + + # Register example as ctest + foreach(device_filter ${DEVICE_FILTERS}) + add_test(NAME ${domain}/EXAMPLE/RT/${sparse_blas_rt_sources}/${device_filter} COMMAND example_${sparse_blas_rt_sources}) + set_property(TEST ${domain}/EXAMPLE/RT/${sparse_blas_rt_sources}/${device_filter} PROPERTY + ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH} + ENVIRONMENT SYCL_DEVICE_FILTER=${device_filter}) + endforeach(device_filter) + +endforeach() diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp new file mode 100644 index 000000000..fb5dc8a67 --- /dev/null +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp @@ -0,0 +1,257 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +/* +* +* Content: +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* using unified shared memory to perform general sparse matrix-vector +* multiplication on a SYCL device (HOST, CPU, GPU) that is selected +* during runtime. +* +* y = alpha * op(A) * x + beta * y +* +* where op() is defined by one of +* +* oneapi::mkl::transpose::{nontrans,trans,conjtrans} +* +* +* This example demonstrates only single precision (float) data type for +* gemv matrix data +* +* +*******************************************************************************/ + +// stl includes +#include +#include + +#if __has_include() +#include +#else +#include +#endif +#include "oneapi/mkl.hpp" + +#include "example_helper.hpp" + +// +// Main example for Sparse Matrix-Vector Multiply consisting of +// initialization of A matrix, x and y vectors as well as +// scalars alpha and beta. Then the product +// +// y = alpha * op(A) * x + beta * y +// +// is performed and finally the results are post processed. +// +int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { + using fp = float; + using intType = std::int32_t; + + // Matrix data size + intType size = 4; + intType nrows = size * size * size; + + // Set scalar fp values + fp alpha = set_fp_value(fp(1.0)); + fp beta = set_fp_value(fp(0.0)); + + // Catch asynchronous exceptions + auto exception_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL " + "exception during sparse::gemv:\n" + << e.what() << std::endl; + } + } + }; + + // create execution queue and buffers of matrix data + sycl::queue main_queue(dev, exception_handler); + + intType *ia, *ja; + fp *a, *x, *y, *z; + std::size_t sizea = static_cast(27 * nrows); + std::size_t sizeja = static_cast(27 * nrows); + std::size_t sizeia = static_cast(nrows + 1); + std::size_t sizevec = static_cast(nrows); + + ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); + ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); + a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), main_queue); + x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + + if (!ia || !ja || !a || !x || !y || !z) { + throw std::runtime_error("Failed to allocate USM memory"); + } + + intType nnz = generate_sparse_matrix(size, ia, ja, a); + + // Init vectors x and y + for (int i = 0; i < nrows; i++) { + x[i] = set_fp_value(fp(1.0)); + y[i] = set_fp_value(fp(0.0)); + z[i] = set_fp_value(fp(0.0)); + } + + std::vector int_ptr_vec; + int_ptr_vec.push_back(ia); + int_ptr_vec.push_back(ja); + std::vector fp_ptr_vec; + fp_ptr_vec.push_back(a); + fp_ptr_vec.push_back(x); + fp_ptr_vec.push_back(y); + fp_ptr_vec.push_back(z); + + // + // Execute Matrix Multiply + // + + oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; + std::cout << "\n\t\tsparse::gemv parameters:\n"; + std::cout << "\t\t\ttransA = " + << (transA == oneapi::mkl::transpose::nontrans + ? "nontrans" + : (transA == oneapi::mkl::transpose::trans ? "trans" : "conjtrans")) + << std::endl; + std::cout << "\t\t\tnrows = " << nrows << std::endl; + std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; + + // create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t handle = nullptr; + + oneapi::mkl::sparse::init_matrix_handle(main_queue, &handle); + + auto ev_set = oneapi::mkl::sparse::set_csr_data(main_queue, handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + auto ev_opt = oneapi::mkl::sparse::optimize_gemv(main_queue, transA, handle, { ev_set }); + + auto ev_gemv = + oneapi::mkl::sparse::gemv(main_queue, transA, alpha, handle, x, beta, y, { ev_opt }); + + auto ev_release = oneapi::mkl::sparse::release_matrix_handle(main_queue, &handle, { ev_gemv }); + + ev_release.wait_and_throw(); + + // + // Post Processing + // + + fp *res = y; + for (intType row = 0; row < nrows; row++) { + fp tmp = set_fp_value(fp(0.0)); + for (intType i = ia[row]; i < ia[row + 1]; i++) { + tmp += a[i] * x[ja[i]]; + } + z[row] = alpha * tmp + beta * z[row]; + } + + fp diff = set_fp_value(fp(0.0)); + for (intType i = 0; i < nrows; i++) { + diff += (z[i] - res[i]) * (z[i] - res[i]); + } + std::cout << "\n\t\t sparse::gemv residual:\n" + << "\t\t\t" << diff << "\n\tFinished" << std::endl; + + free_vec(fp_ptr_vec, main_queue); + free_vec(int_ptr_vec, main_queue); + + if (diff > 0) + return 1; + + return 0; +} + +// +// Description of example setup, apis used and supported floating point type +// precisions +// +void print_example_banner() { + std::cout << "" << std::endl; + std::cout << "########################################################################" + << std::endl; + std::cout << "# Sparse Matrix-Vector Multiply Example: " << std::endl; + std::cout << "# " << std::endl; + std::cout << "# y = alpha * op(A) * x + beta * y" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# where A is a sparse matrix in CSR format, x and y are " + "dense vectors" + << std::endl; + std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Using apis:" << std::endl; + std::cout << "# sparse::gemv" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Using single precision (float) data type" << std::endl; + std::cout << "# " << std::endl; + std::cout << "# Device will be selected during runtime." << std::endl; + std::cout << "# The environment variable SYCL_DEVICE_FILTER can be used to specify" + << std::endl; + std::cout << "# SYCL device" << std::endl; + std::cout << "# " << std::endl; + std::cout << "########################################################################" + << std::endl; + std::cout << std::endl; +} + +// +// Main entry point for example +// +int main(int /*argc*/, char ** /*argv*/) { + print_example_banner(); + + try { + sycl::device dev = sycl::device(); + + if (dev.is_gpu()) { + std::cout << "Running Sparse BLAS GEMV USM example on GPU device." << std::endl; + std::cout << "Device name is: " << dev.get_info() + << std::endl; + } + else { + std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Device name is: " << dev.get_info() + << std::endl; + } + std::cout << "Running with single precision real data type:" << std::endl; + + run_sparse_matrix_vector_multiply_example(dev); + std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + } + catch (sycl::exception const &e) { + std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; + return 1; + } + catch (std::exception const &e) { + std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/include/oneapi/mkl.hpp b/include/oneapi/mkl.hpp index a49c1ceda..f3e9b8618 100644 --- a/include/oneapi/mkl.hpp +++ b/include/oneapi/mkl.hpp @@ -26,5 +26,6 @@ #include "oneapi/mkl/dft.hpp" #include "oneapi/mkl/lapack.hpp" #include "oneapi/mkl/rng.hpp" +#include "oneapi/mkl/sparse_blas.hpp" #endif //_ONEMKL_HPP_ diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index be5e3d897..ed1facd7f 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -41,7 +41,7 @@ namespace oneapi { namespace mkl { enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu }; -enum class domain : uint16_t { blas, dft, lapack, rng }; +enum class domain : uint16_t { blas, dft, lapack, rng, sparse_blas }; static std::map>> libraries = { { domain::blas, @@ -161,13 +161,23 @@ static std::map>> libraries = #ifdef ENABLE_CURAND_BACKEND LIB_NAME("rng_curand") #endif - } } } } + } } } }, + + { domain::sparse_blas, + { { device::x86cpu, + { +#ifdef ENABLE_MKLCPU_BACKEND + LIB_NAME("sparse_blas_mklcpu") +#endif + } } } }, }; static std::map table_names = { { domain::blas, "mkl_blas_table" }, { domain::lapack, "mkl_lapack_table" }, { domain::dft, "mkl_dft_table" }, - { domain::rng, "mkl_rng_table" } }; + { domain::rng, "mkl_rng_table" }, + { domain::sparse_blas, + "mkl_sparse_blas_table" } }; } //namespace mkl } //namespace oneapi diff --git a/include/oneapi/mkl/sparse_blas.hpp b/include/oneapi/mkl/sparse_blas.hpp new file mode 100644 index 000000000..139c30dc5 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas.hpp @@ -0,0 +1,37 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_HPP_ +#define _ONEMKL_SPARSE_BLAS_HPP_ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl/detail/config.hpp" + +#ifdef ENABLE_MKLCPU_BACKEND +#include "sparse_blas/detail/mklcpu/sparse_blas_ct.hpp" +#endif + +#include "sparse_blas/detail/sparse_blas_rt.hpp" + +#endif // _ONEMKL_SPARSE_BLAS_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp new file mode 100644 index 000000000..4964b1eff --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp @@ -0,0 +1,52 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_ + +#include +#include +#include + +namespace oneapi { +namespace mkl { +namespace sparse { +namespace detail { + +struct matrix_handle; + +template +inline constexpr bool is_fp_supported_v = + std::is_same_v || std::is_same_v || + std::is_same_v> || std::is_same_v>; + +template +inline constexpr bool is_int_supported_v = + std::is_same_v || std::is_same_v; + +template +inline constexpr bool are_fp_int_supported_v = + is_fp_supported_v&& is_int_supported_v; + +} // namespace detail +} // namespace sparse +} // namespace mkl +} // namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_HELPER_TYPES_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp new file mode 100644 index 000000000..2535e61f6 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp @@ -0,0 +1,34 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_ + +#include "oneapi/mkl/detail/export.hpp" +#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +namespace detail = oneapi::mkl::sparse::detail; + +#include "oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx" + +} // namespace oneapi::mkl::sparse::mklcpu + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_ONEMKL_SPARSE_BLAS_MKLCPU_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp new file mode 100644 index 000000000..bc0089c57 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp @@ -0,0 +1,41 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ + +#include "oneapi/mkl/sparse_blas/types.hpp" +#include "oneapi/mkl/detail/backends.hpp" +#include "oneapi/mkl/detail/backend_selector.hpp" + +#include "onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi { +namespace mkl { +namespace sparse { + +#define BACKEND mklcpu +#include "oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx" +#undef BACKEND + +} //namespace sparse +} //namespace mkl +} //namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx new file mode 100644 index 000000000..7541f004a --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx @@ -0,0 +1,82 @@ +/*************************************************************************** +* Copyright(C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0(the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// This file is meant to be included in each backend onemkl_sparse_blas_BACKEND.hpp files. +// It is used to exports each symbol to the onemkl_sparse_blas_BACKEND library. + +ONEMKL_EXPORT void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); + +ONEMKL_EXPORT sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, + const std::vector &dependencies = {}); + +template +ONEMKL_EXPORT std::enable_if_t> set_csr_data( + sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, + index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, + sycl::buffer &val); + +template +ONEMKL_EXPORT std::enable_if_t, sycl::event> +set_csr_data(sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, + intType nnz, index_base index, intType *row_ptr, intType *col_ind, fpType *val, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, + matrix_handle_t handle, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, + diag diag_val, matrix_handle_t handle, + const std::vector &dependencies = {}); + +template +ONEMKL_EXPORT std::enable_if_t> gemv( + sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, + sycl::buffer &x, const fpType beta, sycl::buffer &y); + +template +ONEMKL_EXPORT std::enable_if_t, sycl::event> gemv( + sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, + const fpType *x, const fpType beta, fpType *y, + const std::vector &dependencies = {}); + +template +ONEMKL_EXPORT std::enable_if_t> trsv( + sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, + matrix_handle_t A_handle, sycl::buffer &x, sycl::buffer &y); + +template +ONEMKL_EXPORT std::enable_if_t, sycl::event> trsv( + sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, + matrix_handle_t A_handle, const fpType *x, fpType *y, + const std::vector &dependencies = {}); + +template +ONEMKL_EXPORT std::enable_if_t> gemm( + sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, + const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, + const std::int64_t columns, const std::int64_t ldb, const fpType beta, + sycl::buffer &C, const std::int64_t ldc); + +template +ONEMKL_EXPORT std::enable_if_t, sycl::event> gemm( + sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, + const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, + const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, + const std::vector &dependencies = {}); diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx new file mode 100644 index 000000000..d769e32c2 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx @@ -0,0 +1,121 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// This file is meant to be included in each backend sparse_blas_ct.hpp files +// Each function calls the implementation from onemkl_sparse_blas_backends.hxx + +#ifndef BACKEND +#error "BACKEND is not defined" +#endif + +inline void init_matrix_handle(backend_selector selector, + matrix_handle_t *p_handle) { + BACKEND::init_matrix_handle(selector.get_queue(), p_handle); +} + +inline sycl::event release_matrix_handle(backend_selector selector, + matrix_handle_t *p_handle, + const std::vector &dependencies = {}) { + return BACKEND::release_matrix_handle(selector.get_queue(), p_handle, dependencies); +} + +template +std::enable_if_t> set_csr_data( + backend_selector selector, matrix_handle_t handle, intType num_rows, + intType num_cols, intType nnz, index_base index, sycl::buffer &row_ptr, + sycl::buffer &col_ind, sycl::buffer &val) { + BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, row_ptr, + col_ind, val); +} + +template +std::enable_if_t, sycl::event> set_csr_data( + backend_selector selector, matrix_handle_t handle, intType num_rows, + intType num_cols, intType nnz, index_base index, intType *row_ptr, intType *col_ind, + fpType *val, const std::vector &dependencies = {}) { + return BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val, dependencies); +} + +inline sycl::event optimize_gemv(backend_selector selector, + transpose transpose_val, matrix_handle_t handle, + const std::vector &dependencies = {}) { + return BACKEND::optimize_gemv(selector.get_queue(), transpose_val, handle, dependencies); +} + +inline sycl::event optimize_trsv(backend_selector selector, uplo uplo_val, + transpose transpose_val, diag diag_val, matrix_handle_t handle, + const std::vector &dependencies = {}) { + return BACKEND::optimize_trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, handle, + dependencies); +} + +template +std::enable_if_t> gemv( + backend_selector selector, transpose transpose_val, const fpType alpha, + matrix_handle_t A_handle, sycl::buffer &x, const fpType beta, + sycl::buffer &y) { + BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y); +} + +template +std::enable_if_t, sycl::event> gemv( + backend_selector selector, transpose transpose_val, const fpType alpha, + matrix_handle_t A_handle, const fpType *x, const fpType beta, fpType *y, + const std::vector &dependencies = {}) { + return BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y, + dependencies); +} + +template +std::enable_if_t> trsv( + backend_selector selector, uplo uplo_val, transpose transpose_val, + diag diag_val, matrix_handle_t A_handle, sycl::buffer &x, + sycl::buffer &y) { + BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y); +} + +template +std::enable_if_t, sycl::event> trsv( + backend_selector selector, uplo uplo_val, transpose transpose_val, + diag diag_val, matrix_handle_t A_handle, const fpType *x, fpType *y, + const std::vector &dependencies = {}) { + return BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y, + dependencies); +} + +template +std::enable_if_t> gemm( + backend_selector selector, layout dense_matrix_layout, transpose transpose_A, + transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, + const std::int64_t columns, const std::int64_t ldb, const fpType beta, + sycl::buffer &C, const std::int64_t ldc) { + BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, + A_handle, B, columns, ldb, beta, C, ldc); +} + +template +std::enable_if_t, sycl::event> gemm( + backend_selector selector, layout dense_matrix_layout, transpose transpose_A, + transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, const fpType *B, + const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType *C, + const std::int64_t ldc, const std::vector &dependencies = {}) { + return BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, + A_handle, B, columns, ldb, beta, C, ldc, dependencies); +} diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp new file mode 100644 index 000000000..af46f95a6 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp @@ -0,0 +1,95 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ + +#include "oneapi/mkl/sparse_blas/types.hpp" + +namespace oneapi { +namespace mkl { +namespace sparse { + +void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); + +sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, + const std::vector &dependencies = {}); + +template +std::enable_if_t> set_csr_data( + sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, + index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, + sycl::buffer &val); + +template +std::enable_if_t, sycl::event> set_csr_data( + sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, + index_base index, intType *row_ptr, intType *col_ind, fpType *val, + const std::vector &dependencies = {}); + +sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, + const std::vector &dependencies = {}); + +sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, + matrix_handle_t handle, + const std::vector &dependencies = {}); + +template +std::enable_if_t> gemv( + sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, + sycl::buffer &x, const fpType beta, sycl::buffer &y); + +template +std::enable_if_t, sycl::event> gemv( + sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, + const fpType *x, const fpType beta, fpType *y, + const std::vector &dependencies = {}); + +template +std::enable_if_t> trsv(sycl::queue &queue, uplo uplo_val, + transpose transpose_val, diag diag_val, + matrix_handle_t A_handle, + sycl::buffer &x, + sycl::buffer &y); + +template +std::enable_if_t, sycl::event> trsv( + sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, + matrix_handle_t A_handle, const fpType *x, fpType *y, + const std::vector &dependencies = {}); + +template +std::enable_if_t> gemm( + sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, + const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, + const std::int64_t columns, const std::int64_t ldb, const fpType beta, + sycl::buffer &C, const std::int64_t ldc); + +template +std::enable_if_t, sycl::event> gemm( + sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, + const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, + const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, + const std::vector &dependencies = {}); + +} // namespace sparse +} // namespace mkl +} // namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/types.hpp b/include/oneapi/mkl/sparse_blas/types.hpp new file mode 100644 index 000000000..406c7dd1f --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/types.hpp @@ -0,0 +1,44 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_TYPES_HPP_ +#define _ONEMKL_SPARSE_BLAS_TYPES_HPP_ + +#if __has_include() +#include +#else +#include +#endif + +#include + +#include "oneapi/mkl/types.hpp" +#include "detail/helper_types.hpp" + +namespace oneapi { +namespace mkl { +namespace sparse { + +using matrix_handle_t = detail::matrix_handle*; + +} // namespace sparse +} // namespace mkl +} // namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_TYPES_HPP_ diff --git a/src/sparse_blas/CMakeLists.txt b/src/sparse_blas/CMakeLists.txt new file mode 100644 index 000000000..e66158e44 --- /dev/null +++ b/src/sparse_blas/CMakeLists.txt @@ -0,0 +1,47 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +add_subdirectory(backends) + +if(BUILD_SHARED_LIBS) + add_library(onemkl_sparse_blas OBJECT) + target_sources(onemkl_sparse_blas PRIVATE sparse_blas_loader.cpp) + target_include_directories(onemkl_sparse_blas + PRIVATE ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_SOURCE_DIR}/src/include + ${CMAKE_BINARY_DIR}/bin + $ + ) + + target_compile_options(onemkl_sparse_blas PRIVATE ${ONEMKL_BUILD_COPT}) + + set_target_properties(onemkl_sparse_blas PROPERTIES + POSITION_INDEPENDENT_CODE ON + ) + if (USE_ADD_SYCL_TO_TARGET_INTEGRATION) + add_sycl_to_target(TARGET onemkl_sparse_blas SOURCES sparse_blas_loader.cpp) + else() + target_link_libraries(onemkl_sparse_blas PUBLIC ONEMKL::SYCL::SYCL) + endif() + + include(WarningsUtils) + target_link_libraries(onemkl_sparse_blas PRIVATE onemkl_warnings) + +endif() diff --git a/src/sparse_blas/backends/CMakeLists.txt b/src/sparse_blas/backends/CMakeLists.txt new file mode 100644 index 000000000..72a2dd207 --- /dev/null +++ b/src/sparse_blas/backends/CMakeLists.txt @@ -0,0 +1,22 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +if(ENABLE_MKLCPU_BACKEND) + add_subdirectory(mklcpu) +endif() diff --git a/src/sparse_blas/backends/backend_wrappers.cxx b/src/sparse_blas/backends/backend_wrappers.cxx new file mode 100644 index 000000000..b97bc567b --- /dev/null +++ b/src/sparse_blas/backends/backend_wrappers.cxx @@ -0,0 +1,83 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +/* +This file lists functions matching those required by sparse_blas_function_table_t in +src/sparse_blas/function_table.hpp. + +To use this: + +#define WRAPPER_VERSION +#define BACKEND + +extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = { + WRAPPER_VERSION, +#include "sparse_blas/backends/backend_wrappers.cxx" +}; + +Changes to this file should be matched to changes in sparse_blas/function_table.hpp. The required +function template instantiations must be added to backend_sparse_blas_instantiations.cxx. +*/ + +// clang-format off +oneapi::mkl::sparse::BACKEND::init_matrix_handle, +oneapi::mkl::sparse::BACKEND::release_matrix_handle, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::set_csr_data, +oneapi::mkl::sparse::BACKEND::optimize_gemv, +oneapi::mkl::sparse::BACKEND::optimize_trsv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::gemv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::trsv, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, +oneapi::mkl::sparse::BACKEND::gemm, + // clang-format on diff --git a/src/sparse_blas/backends/mkl_common/mkl_basic.cxx b/src/sparse_blas/backends/mkl_common/mkl_basic.cxx new file mode 100644 index 000000000..fd3b1563a --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_basic.cxx @@ -0,0 +1,62 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +void init_matrix_handle(sycl::queue & /*queue*/, detail::matrix_handle **p_handle) { + oneapi::mkl::sparse::init_matrix_handle(detail::get_handle(p_handle)); +} + +sycl::event release_matrix_handle(sycl::queue &queue, detail::matrix_handle **p_handle, + const std::vector &dependencies) { + return oneapi::mkl::sparse::release_matrix_handle(queue, detail::get_handle(p_handle), + dependencies); +} + +template +std::enable_if_t> set_csr_data( + sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, + intType /*nnz*/, index_base index, sycl::buffer &row_ptr, + sycl::buffer &col_ind, sycl::buffer &val) { + oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, index, + row_ptr, col_ind, val); +} + +template +std::enable_if_t, sycl::event> set_csr_data( + sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, + intType /*nnz*/, index_base index, intType *row_ptr, intType *col_ind, fpType *val, + const std::vector &dependencies) { + return oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, + index, row_ptr, col_ind, val, dependencies); +} + +#define INSTANTIATE_SET_CSR_DATA(FP_TYPE, INT_TYPE) \ + template std::enable_if_t> \ + set_csr_data( \ + sycl::queue & queue, detail::matrix_handle * handle, INT_TYPE num_rows, INT_TYPE num_cols, \ + INT_TYPE nnz, index_base index, sycl::buffer & row_ptr, \ + sycl::buffer & col_ind, sycl::buffer & val); \ + template std::enable_if_t, sycl::event> \ + set_csr_data(sycl::queue & queue, detail::matrix_handle * handle, \ + INT_TYPE num_rows, INT_TYPE num_cols, INT_TYPE nnz, \ + index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, \ + FP_TYPE * val, const std::vector &dependencies) + +FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_SET_CSR_DATA); + +#undef INSTANTIATE_SET_CSR_DATA diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp new file mode 100644 index 000000000..da5235ee0 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -0,0 +1,56 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +// MKLCPU and MKLGPU backends include +// This include defines its own oneapi::mkl::sparse namespace with some of the types that are used here: matrix_handle_t, index_base, transpose, uolo, diag. +#include + +// Includes are set up so that oneapi::mkl::sparse namespace refers to the MKLCPU and MKLGPU backends namespace (oneMKL product) +// in this file. +// oneapi::mkl::sparse::detail namespace refers to the oneMKL interface namespace. + +#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" + +namespace oneapi::mkl::sparse::detail { + +inline auto get_handle(detail::matrix_handle **handle) { + return reinterpret_cast(handle); +} + +inline auto get_handle(detail::matrix_handle *handle) { + return reinterpret_cast(handle); +} + +} // namespace oneapi::mkl::sparse::detail + +#define FOR_EACH_FP_TYPE(INSTANTIATE_MACRO) \ + INSTANTIATE_MACRO(float); \ + INSTANTIATE_MACRO(double); \ + INSTANTIATE_MACRO(std::complex); \ + INSTANTIATE_MACRO(std::complex) + +#define FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, INT_TYPE) \ + INSTANTIATE_MACRO(float, INT_TYPE); \ + INSTANTIATE_MACRO(double, INT_TYPE); \ + INSTANTIATE_MACRO(std::complex, INT_TYPE); \ + INSTANTIATE_MACRO(std::complex, INT_TYPE) + +#define FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_MACRO) \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int32_t); \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int64_t) diff --git a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx new file mode 100644 index 000000000..c854b63be --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx @@ -0,0 +1,125 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +sycl::event optimize_gemv(sycl::queue& queue, transpose transpose_val, + detail::matrix_handle* handle, + const std::vector& dependencies) { + return oneapi::mkl::sparse::optimize_gemv(queue, transpose_val, get_handle(handle), + dependencies); +} + +sycl::event optimize_trsv(sycl::queue& /*queue*/, uplo /*uplo_val*/, transpose /*transpose_val*/, + diag /*diag_val*/, detail::matrix_handle* /*handle*/, + const std::vector& /*dependencies*/) { + throw unimplemented("SPARSE_BLAS", "optimize_trsv"); +} + +template +std::enable_if_t> gemv( + sycl::queue& queue, transpose transpose_val, const fpType alpha, + detail::matrix_handle* A_handle, sycl::buffer& x, const fpType beta, + sycl::buffer& y) { + oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, get_handle(A_handle), x, beta, y); +} + +template +std::enable_if_t, sycl::event> gemv( + sycl::queue& queue, transpose transpose_val, const fpType alpha, + detail::matrix_handle* A_handle, const fpType* x, const fpType beta, fpType* y, + const std::vector& dependencies) { + return oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, get_handle(A_handle), x, beta, y, + dependencies); +} + +template +std::enable_if_t> trsv(sycl::queue& /*queue*/, uplo /*uplo_val*/, + transpose /*transpose_val*/, + diag /*diag_val*/, + detail::matrix_handle* /*A_handle*/, + sycl::buffer& /*x*/, + sycl::buffer& /*y*/) { + throw unimplemented("SPARSE_BLAS", "trsv"); +} + +template +std::enable_if_t, sycl::event> trsv( + sycl::queue& /*queue*/, uplo /*uplo_val*/, transpose /*transpose_val*/, diag /*diag_val*/, + detail::matrix_handle* /*A_handle*/, const fpType* /*x*/, fpType* /*y*/, + const std::vector& /*dependencies*/) { + throw unimplemented("SPARSE_BLAS", "trsv"); +} + +template +std::enable_if_t> gemm( + sycl::queue& /*queue*/, layout /*dense_matrix_layout*/, transpose /*transpose_A*/, + transpose /*transpose_B*/, const fpType /*alpha*/, detail::matrix_handle* /*A_handle*/, + sycl::buffer& /*B*/, const std::int64_t /*columns*/, const std::int64_t /*ldb*/, + const fpType /*beta*/, sycl::buffer& /*C*/, const std::int64_t /*ldc*/) { + throw unimplemented("SPARSE_BLAS", "gemm"); +} + +template +std::enable_if_t, sycl::event> gemm( + sycl::queue& /*queue*/, layout /*dense_matrix_layout*/, transpose /*transpose_A*/, + transpose /*transpose_B*/, const fpType /*alpha*/, detail::matrix_handle* /*A_handle*/, + const fpType* /*B*/, const std::int64_t /*columns*/, const std::int64_t /*ldb*/, + const fpType /*beta*/, fpType* /*C*/, const std::int64_t /*ldc*/, + const std::vector& /*dependencies*/) { + throw unimplemented("SPARSE_BLAS", "gemm"); +} + +#define INSTANTIATE_GEMV(FP_TYPE) \ + template std::enable_if_t> gemv( \ + sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ + detail::matrix_handle* A_handle, sycl::buffer& x, const FP_TYPE beta, \ + sycl::buffer& y); \ + template std::enable_if_t, sycl::event> gemv( \ + sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ + detail::matrix_handle* A_handle, const FP_TYPE* x, const FP_TYPE beta, FP_TYPE* y, \ + const std::vector& dependencies) + +#define INSTANTIATE_TRSV(FP_TYPE) \ + template std::enable_if_t> trsv( \ + sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ + detail::matrix_handle* A_handle, sycl::buffer& x, \ + sycl::buffer& y); \ + template std::enable_if_t, sycl::event> trsv( \ + sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ + detail::matrix_handle* A_handle, const FP_TYPE* x, FP_TYPE* y, \ + const std::vector& dependencies) + +#define INSTANTIATE_GEMM(FP_TYPE) \ + template std::enable_if_t> gemm( \ + sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ + transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ + sycl::buffer& B, const std::int64_t columns, const std::int64_t ldb, \ + const FP_TYPE beta, sycl::buffer& C, const std::int64_t ldc); \ + template std::enable_if_t, sycl::event> gemm( \ + sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ + transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ + const FP_TYPE* B, const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, \ + FP_TYPE* C, const std::int64_t ldc, const std::vector& dependencies) + +FOR_EACH_FP_TYPE(INSTANTIATE_GEMV); +FOR_EACH_FP_TYPE(INSTANTIATE_TRSV); +FOR_EACH_FP_TYPE(INSTANTIATE_GEMM); + +#undef INSTANTIATE_GEMV +#undef INSTANTIATE_TRSV +#undef INSTANTIATE_GEMM diff --git a/src/sparse_blas/backends/mklcpu/CMakeLists.txt b/src/sparse_blas/backends/mklcpu/CMakeLists.txt new file mode 100644 index 000000000..a06b77900 --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/CMakeLists.txt @@ -0,0 +1,83 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +set(LIB_NAME onemkl_sparse_blas_mklcpu) +set(LIB_OBJ ${LIB_NAME}_obj) + +include(WarningsUtils) + +add_library(${LIB_NAME}) +add_library(${LIB_OBJ} OBJECT + mklcpu_basic.cpp + mklcpu_operations.cpp + $<$: mklcpu_wrappers.cpp> +) + +target_include_directories(${LIB_OBJ} + PRIVATE ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src + ${CMAKE_BINARY_DIR}/bin +) + +target_compile_options(${LIB_OBJ} PRIVATE ${ONEMKL_BUILD_COPT}) +if (USE_ADD_SYCL_TO_TARGET_INTEGRATION) + add_sycl_to_target(TARGET ${LIB_OBJ} SOURCES ${SOURCES}) +endif() + +if(TARGET MKL::MKL_SYCL::SPARSE) + target_link_libraries(${LIB_OBJ} + PUBLIC ONEMKL::SYCL::SYCL + PUBLIC MKL::MKL_SYCL::SPARSE + PRIVATE onemkl_warnings + ) +else() + target_link_libraries(${LIB_OBJ} + PUBLIC ONEMKL::SYCL::SYCL + PUBLIC MKL::MKL_DPCPP + PRIVATE onemkl_warnings + ) +endif() + +set_target_properties(${LIB_OBJ} PROPERTIES + POSITION_INDEPENDENT_CODE ON +) +target_link_libraries(${LIB_NAME} PUBLIC ${LIB_OBJ}) + +#Set oneMKL libraries as not transitive for dynamic +if(BUILD_SHARED_LIBS) + set_target_properties(${LIB_NAME} PROPERTIES + INTERFACE_LINK_LIBRARIES ONEMKL::SYCL::SYCL + ) +endif() + +# Add major version to the library +set_target_properties(${LIB_NAME} PROPERTIES + SOVERSION ${PROJECT_VERSION_MAJOR} +) + +# Add dependencies rpath to the library +list(APPEND CMAKE_BUILD_RPATH $) + +# Add the library to install package +install(TARGETS ${LIB_OBJ} EXPORT oneMKLTargets) +install(TARGETS ${LIB_NAME} EXPORT oneMKLTargets + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib +) diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp new file mode 100644 index 000000000..9ab29ee92 --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include "../mkl_common/mkl_helper.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "../mkl_common/mkl_basic.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp new file mode 100644 index 000000000..e636b1816 --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp @@ -0,0 +1,28 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include "../mkl_common/mkl_helper.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "../mkl_common/mkl_operations.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp new file mode 100644 index 000000000..40f75c60c --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp @@ -0,0 +1,32 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include "oneapi/mkl/sparse_blas/types.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +#include "sparse_blas/function_table.hpp" + +#define WRAPPER_VERSION 1 +#define BACKEND mklcpu + +extern "C" sparse_blas_function_table_t mkl_sparse_blas_table = { + WRAPPER_VERSION, +#include "sparse_blas/backends/backend_wrappers.cxx" +}; diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp new file mode 100644 index 000000000..d4656190a --- /dev/null +++ b/src/sparse_blas/function_table.hpp @@ -0,0 +1,291 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* (*Licensed under the Apache License, Version 2.0 )(the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ +#define _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ + +#include "oneapi/mkl/sparse_blas/types.hpp" + +typedef struct { + int version; + void (*init_matrix_handle)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_handle); + + sycl::event (*release_matrix_handle)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t *p_handle, + const std::vector &dependencies); + + // set_csr_data + void (*set_csr_data_buffer_rf_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer &val); + void (*set_csr_data_buffer_rd_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer &val); + void (*set_csr_data_buffer_cf_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer, 1> &val); + void (*set_csr_data_buffer_cd_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer, 1> &val); + void (*set_csr_data_buffer_rf_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer &val); + void (*set_csr_data_buffer_rd_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer &val); + void (*set_csr_data_buffer_cf_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer, 1> &val); + void (*set_csr_data_buffer_cd_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + sycl::buffer &row_ptr, + sycl::buffer &col_ind, + sycl::buffer, 1> &val); + sycl::event (*set_csr_data_usm_rf_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + std::int32_t *row_ptr, std::int32_t *col_ind, float *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_rd_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + std::int32_t *row_ptr, std::int32_t *col_ind, + double *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_cf_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + std::int32_t *row_ptr, std::int32_t *col_ind, + std::complex *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_cd_i32)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int32_t num_rows, std::int32_t num_cols, + std::int32_t nnz, oneapi::mkl::index_base index, + std::int32_t *row_ptr, std::int32_t *col_ind, + std::complex *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_rf_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + std::int64_t *row_ptr, std::int64_t *col_ind, float *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_rd_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + std::int64_t *row_ptr, std::int64_t *col_ind, + double *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_cf_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + std::int64_t *row_ptr, std::int64_t *col_ind, + std::complex *val, + const std::vector &dependencies); + sycl::event (*set_csr_data_usm_cd_i64)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, oneapi::mkl::index_base index, + std::int64_t *row_ptr, std::int64_t *col_ind, + std::complex *val, + const std::vector &dependencies); + + // optimize_* + sycl::event (*optimize_gemv)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + oneapi::mkl::sparse::matrix_handle_t handle, + const std::vector &dependencies); + sycl::event (*optimize_trsv)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t handle, + const std::vector &dependencies); + + // gemv + void (*gemv_buffer_rf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer &x, const float beta, sycl::buffer &y); + void (*gemv_buffer_rd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer &x, const double beta, + sycl::buffer &y); + void (*gemv_buffer_cf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &x, const std::complex beta, + sycl::buffer, 1> &y); + void (*gemv_buffer_cd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &x, + const std::complex beta, + sycl::buffer, 1> &y); + sycl::event (*gemv_usm_rf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + const float *x, const float beta, float *y, + const std::vector &dependencies); + sycl::event (*gemv_usm_rd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + const double *x, const double beta, double *y, + const std::vector &dependencies); + sycl::event (*gemv_usm_cf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *x, const std::complex beta, + std::complex *y, + const std::vector &dependencies); + sycl::event (*gemv_usm_cd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *x, const std::complex beta, + std::complex *y, + const std::vector &dependencies); + + // trsv + void (*trsv_buffer_rf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, + sycl::buffer &y); + void (*trsv_buffer_rd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer &x, sycl::buffer &y); + void (*trsv_buffer_cf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &x, + sycl::buffer, 1> &y); + void (*trsv_buffer_cd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &x, + sycl::buffer, 1> &y); + sycl::event (*trsv_usm_rf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, const float *x, + float *y, const std::vector &dependencies); + sycl::event (*trsv_usm_rd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, const double *x, + double *y, const std::vector &dependencies); + sycl::event (*trsv_usm_cf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *x, std::complex *y, + const std::vector &dependencies); + sycl::event (*trsv_usm_cd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, + oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *x, std::complex *y, + const std::vector &dependencies); + + // gemm + void (*gemm_buffer_rf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, + const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer &B, const std::int64_t columns, + const std::int64_t ldb, const float beta, sycl::buffer &C, + const std::int64_t ldc); + void (*gemm_buffer_rd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, + const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer &B, const std::int64_t columns, + const std::int64_t ldb, const double beta, sycl::buffer &C, + const std::int64_t ldc); + void (*gemm_buffer_cf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &B, const std::int64_t columns, + const std::int64_t ldb, const std::complex beta, + sycl::buffer, 1> &C, const std::int64_t ldc); + void (*gemm_buffer_cd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, + const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + sycl::buffer, 1> &B, const std::int64_t columns, + const std::int64_t ldb, const std::complex beta, + sycl::buffer, 1> &C, const std::int64_t ldc); + sycl::event (*gemm_usm_rf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, const float alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, const float *B, + const std::int64_t columns, const std::int64_t ldb, const float beta, + float *C, const std::int64_t ldc, + const std::vector &dependencies); + sycl::event (*gemm_usm_rd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, const double alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, const double *B, + const std::int64_t columns, const std::int64_t ldb, + const double beta, double *C, const std::int64_t ldc, + const std::vector &dependencies); + sycl::event (*gemm_usm_cf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *B, const std::int64_t columns, + const std::int64_t ldb, const std::complex beta, + std::complex *C, const std::int64_t ldc, + const std::vector &dependencies); + sycl::event (*gemm_usm_cd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, const std::complex alpha, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::complex *B, const std::int64_t columns, + const std::int64_t ldb, const std::complex beta, + std::complex *C, const std::int64_t ldc, + const std::vector &dependencies); +} sparse_blas_function_table_t; + +#endif // _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp new file mode 100644 index 000000000..07d065561 --- /dev/null +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -0,0 +1,167 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include "oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp" + +#include "function_table_initializer.hpp" +#include "sparse_blas/function_table.hpp" +#include "oneapi/mkl/detail/get_device_id.hpp" + +#define FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + DEFINE_MACRO(float, _rf); \ + DEFINE_MACRO(double, _rd); \ + DEFINE_MACRO(std::complex, _cf); \ + DEFINE_MACRO(std::complex, _cd) + +#define FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, INT_TYPE, INT_SUFFIX) \ + DEFINE_MACRO(float, _rf, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(double, _rd, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(std::complex, _cf, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(std::complex, _cd, INT_TYPE, INT_SUFFIX) + +#define FOR_EACH_FP_AND_INT_TYPE(DEFINE_MACRO) \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) + +namespace oneapi::mkl::sparse { + +static oneapi::mkl::detail::table_initializer + function_tables; + +void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_matrix_handle(queue, p_handle); +} + +sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_matrix_handle(queue, p_handle, dependencies); +} + +#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template <> \ + void set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ + INT_TYPE num_cols, INT_TYPE nnz, index_base index, \ + sycl::buffer &row_ptr, sycl::buffer &col_ind, \ + sycl::buffer &val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + sycl::event set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ + INT_TYPE num_cols, INT_TYPE nnz, index_base index, INT_TYPE *row_ptr, \ + INT_TYPE *col_ind, FP_TYPE *val, \ + const std::vector &dependencies) { \ + auto libkey = get_device_id(queue); \ + return function_tables[libkey].set_csr_data_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val, dependencies); \ + } + +FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA) +#undef DEFINE_SET_CSR_DATA + +sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].optimize_gemv(queue, transpose_val, handle, dependencies); +} + +sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, + matrix_handle_t handle, const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].optimize_trsv(queue, uplo_val, transpose_val, diag_val, handle, + dependencies); +} + +#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ + template <> \ + void gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ + matrix_handle_t A_handle, sycl::buffer &x, const FP_TYPE beta, \ + sycl::buffer &y) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].gemv_buffer##FP_SUFFIX(queue, transpose_val, alpha, A_handle, x, \ + beta, y); \ + } \ + template <> \ + sycl::event gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ + matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, FP_TYPE *y, \ + const std::vector &dependencies) { \ + auto libkey = get_device_id(queue); \ + return function_tables[libkey].gemv_usm##FP_SUFFIX(queue, transpose_val, alpha, A_handle, \ + x, beta, y, dependencies); \ + } + +FOR_EACH_FP_TYPE(DEFINE_GEMV) +#undef DEFINE_GEMV + +#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ + template <> \ + void trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ + matrix_handle_t A_handle, sycl::buffer &x, \ + sycl::buffer &y) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].trsv_buffer##FP_SUFFIX(queue, uplo_val, transpose_val, diag_val, \ + A_handle, x, y); \ + } \ + template <> \ + sycl::event trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ + matrix_handle_t A_handle, const FP_TYPE *x, FP_TYPE *y, \ + const std::vector &dependencies) { \ + auto libkey = get_device_id(queue); \ + return function_tables[libkey].trsv_usm##FP_SUFFIX( \ + queue, uplo_val, transpose_val, diag_val, A_handle, x, y, dependencies); \ + } + +FOR_EACH_FP_TYPE(DEFINE_TRSV) +#undef DEFINE_TRSV + +#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ + template <> \ + void gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ + transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ + sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ + const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].gemm_buffer##FP_SUFFIX(queue, dense_matrix_layout, transpose_A, \ + transpose_B, alpha, A_handle, B, columns, \ + ldb, beta, C, ldc); \ + } \ + template <> \ + sycl::event gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ + transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ + const FP_TYPE *B, const std::int64_t columns, const std::int64_t ldb, \ + const FP_TYPE beta, FP_TYPE *C, const std::int64_t ldc, \ + const std::vector &dependencies) { \ + auto libkey = get_device_id(queue); \ + return function_tables[libkey].gemm_usm##FP_SUFFIX( \ + queue, dense_matrix_layout, transpose_A, transpose_B, alpha, A_handle, B, columns, \ + ldb, beta, C, ldc, dependencies); \ + } + +FOR_EACH_FP_TYPE(DEFINE_GEMM) +#undef DEFINE_GEMM + +} // namespace oneapi::mkl::sparse + +#undef FOR_EACH_FP_TYPE +#undef FOR_EACH_FP_AND_INT_TYPE +#undef FOR_EACH_FP_AND_INT_TYPE_HELPER diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 464d8841c..43b2444a7 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -65,6 +65,12 @@ set(dft_TEST_LIST set(dft_TEST_LINK "") +# Sparse BLAS config +set(sparse_blas_TEST_LIST + spblas_source) + +set(sparse_blas_TEST_LINK "") + foreach(domain ${TARGET_DOMAINS}) # Generate RT and CT test lists set(${domain}_TEST_LIST_RT ${${domain}_TEST_LIST}) diff --git a/tests/unit_tests/sparse_blas/CMakeLists.txt b/tests/unit_tests/sparse_blas/CMakeLists.txt new file mode 100644 index 000000000..2c46cd38c --- /dev/null +++ b/tests/unit_tests/sparse_blas/CMakeLists.txt @@ -0,0 +1,20 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +add_subdirectory(source) diff --git a/tests/unit_tests/sparse_blas/include/sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/sparse_reference.hpp new file mode 100644 index 000000000..fd9ffaa3d --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/sparse_reference.hpp @@ -0,0 +1,149 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _SPARSE_REFERENCE_HPP__ +#define _SPARSE_REFERENCE_HPP__ + +#include + +#include "oneapi/mkl.hpp" + +template +inline T conjugate(T) { + static_assert(false, "Unsupported type"); +} +template <> +inline float conjugate(float t) { + return t; +} +template <> +inline double conjugate(double t) { + return t; +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} + +template +inline T opVal(const T t, const bool isConj) { + return (isConj ? conjugate(t) : t); +}; + +template +void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, + intType a_nrows, intType a_ncols, intType a_ind, accIntType &ia, + accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { + const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); + + // initialize ia_t to zero + for (intType i = 0; i < a_ncols + 1; ++i) { + ia_t[i] = 0; + } + + // fill ia_t with counts of columns + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - a_ind; + const intType en = ia[i + 1] - a_ind; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - a_ind; + ia_t[col + 1]++; + } + } + // prefix sum to get official ia_t counts + ia_t[0] = a_ind; + for (intType i = 0; i < a_ncols; ++i) { + ia_t[i + 1] += ia_t[i]; + } + + // second pass through data to fill transpose structure + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - a_ind; + const intType en = ia[i + 1] - a_ind; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - a_ind; + const intType j_in_a_t = ia_t[col] - a_ind; + ia_t[col]++; + ja_t[j_in_a_t] = i + a_ind; + if (!structOnlyFlag) { + const fpType val = a[j]; + a_t[j_in_a_t] = opVal(val, isConj); + } + } + } + + // adjust ia_t back to original state after filling structure + for (intType i = a_ncols; i > 0; --i) { + ia_t[i] = ia_t[i - 1]; + } + ia_t[0] = a_ind; +} + +template +void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpType *a, + intType a_nrows, intType a_ncols, intType a_nnz, intType a_ind, + oneapi::mkl::transpose opA, fpType alpha, fpType beta, + const fpType *x, fpType *y_ref) { + std::size_t opa_nrows = + static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols); + const std::size_t nnz = static_cast(a_nnz); + + // prepare op(A) locally + std::vector iopa; + std::vector jopa; + std::vector opa; + if (opA == oneapi::mkl::transpose::nontrans) { + iopa.assign(ia, ia + a_nrows + 1); + jopa.assign(ja, ja + nnz); + opa.assign(a, a + nnz); + } + else if (opA == oneapi::mkl::transpose::trans || opA == oneapi::mkl::transpose::conjtrans) { + iopa.resize(opa_nrows + 1); + jopa.resize(nnz); + opa.resize(nnz); + do_csr_transpose(opA, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, a_ind, ia, ja, + a); + } + else { + throw std::runtime_error( + "unsupported transpose_val (opA) in prepare_reference_gemv_data()"); + } + + // + // do GEMV operation + // + // y_ref <- alpha * op(A) * x + beta * y_ref + // + for (std::size_t row = 0; row < opa_nrows; row++) { + fpType tmp = 0; + for (intType i = iopa[row] - a_ind; i < iopa[row + 1] - a_ind; i++) { + std::size_t iu = static_cast(i); + std::size_t x_ind = static_cast(jopa[iu] - a_ind); + tmp += opa[iu] * x[x_ind]; + } + + y_ref[row] = alpha * tmp + beta * y_ref[row]; + } +} + +#endif // _SPARSE_REFERENCE_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp new file mode 100644 index 000000000..0cf63e10f --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -0,0 +1,248 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_COMMON_HPP__ +#define _TEST_COMMON_HPP__ + +#include +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include "test_helper.hpp" + +// Sparse BLAS domain needs to call more functions per test so we use this macro helper to select between runtime and compile dispatch for each function +#ifdef CALL_RT_API +#define CALL_RT_OR_CT(FUNC, QUEUE, ...) FUNC(QUEUE, __VA_ARGS__) +#else +#define CALL_RT_OR_CT(FUNC, QUEUE, ...) TEST_RUN_CT_SELECT(QUEUE, FUNC, __VA_ARGS__); +#endif + +template +struct complex_info { + using real_type = T; + static const bool is_complex = false; +}; + +template +struct complex_info> { + using real_type = T; + static const bool is_complex = true; +}; + +void print_error_code(sycl::exception const &e); + +// Catch asynchronous exceptions. +struct exception_handler_t { + void operator()(sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; + print_error_code(e); + } + } + } +}; + +// Use a unique_ptr to automatically free device memory on unique_ptr destruction. +template +auto malloc_device_uptr(sycl::queue &q, std::size_t num_elts) { + struct Deleter { + sycl::queue &q; + Deleter(sycl::queue &_q) : q(_q) {} + void operator()(T *ptr) { + sycl::free(ptr, q); + } + }; + return std::unique_ptr(sycl::malloc_device(num_elts, q), Deleter(q)); +} + +// SYCL buffer creation helper. +template +sycl::buffer make_buffer(const vec &v) { + sycl::buffer buf(v.data(), sycl::range<1>(v.size())); + return buf; +} + +template +struct rand_scalar { + inline fpType operator()(double min, double max) { + return (fpType(std::rand()) / fpType(RAND_MAX)) * fpType(max - min) - fpType(min); + } +}; + +template +struct rand_scalar> { + inline std::complex operator()(double min, double max) { + rand_scalar rand; + return std::complex(rand(min, max), rand(min, max)); + } +}; + +template +void rand_vector(std::vector &v, std::size_t n) { + using fpRealType = typename complex_info::real_type; + v.resize(n); + rand_scalar rand; + for (std::size_t i = 0; i < n; i++) { + v[i] = rand(fpRealType(-0.5), fpRealType(0.5)); + } +} + +// Creating the 3arrays CSR representation (ia, ja, values) +// of general random sparse matrix +// with density (0 < density <= 1.0) +// -0.5 <= value < 0.5 +// require_diagonal means all diagonal entries guaranteed to be nonzero +template +intType generate_random_matrix(const intType nrows, const intType ncols, const double density_val, + intType indexing, std::vector &ia, std::vector &ja, + std::vector &a, bool require_diagonal = false) { + intType nnz = 0; + rand_scalar rand_density; + rand_scalar rand_data; + + ia.push_back(indexing); // starting index of row0. + for (intType i = 0; i < nrows; i++) { + ia.push_back(nnz + indexing); // ending index of row_i. + for (intType j = 0; j < ncols; j++) { + if ((require_diagonal && i == j) || (rand_density(0.0, 1.0) < density_val)) { + a.push_back(rand_data(-0.5, 0.5)); + ja.push_back(j + indexing); + nnz++; + } + } + ia[static_cast(i) + 1] = nnz + indexing; + } + return nnz; +} + +// Shuffle the 3arrays CSR representation (ia, ja, values) +// of any sparse matrix and set values serially from 0..nnz. +// Intended for use with sorting. +template +void shuffle_data(const intType *ia, intType *ja, fpType *a, const std::size_t nrows) { + // + // shuffle indices according to random seed + // + intType indexing = ia[0]; + for (std::size_t i = 0; i < nrows; ++i) { + intType nnz_row = ia[i + 1] - ia[i]; + for (intType j = ia[i] - indexing; j < ia[i + 1] - indexing; ++j) { + intType q = ia[i] - indexing + std::rand() % (nnz_row); + // swap element i and q + std::swap(ja[q], ja[j]); + std::swap(a[q], a[j]); + } + } +} + +template +struct set_fp_value { + inline fpType operator()(fpType real, fpType /*imag*/) { + return real; + } +}; + +template +struct set_fp_value> { + inline auto operator()(scalarType real, scalarType imag) { + return std::complex(real, imag); + } +}; + +inline void wait_and_free(sycl::queue &main_queue, oneapi::mkl::sparse::matrix_handle_t *p_handle) { + main_queue.wait(); + sycl::event ev_release; + CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, p_handle); + ev_release.wait(); +} + +template +bool check_equal(fpType x, fpType x_ref, double abs_error_margin, double rel_error_margin, + std::ostream &out) { + using fpRealType = typename complex_info::real_type; + static_assert(std::is_floating_point_v, + "Expected floating-point real or complex type."); + + const fpRealType epsilon = std::numeric_limits::epsilon(); + const auto abs_bound = static_cast(abs_error_margin) * epsilon; + const auto rel_bound = static_cast(rel_error_margin) * epsilon; + + const auto aerr = std::abs(x - x_ref); + const auto rerr = aerr / std::abs(x_ref); + const bool valid = (rerr <= rel_bound) || (aerr <= abs_bound); + if (!valid) { + out << "Mismatching results: actual = " << x << " vs. reference = " << x_ref << "\n"; + out << " relative error = " << rerr << " absolute error = " << aerr + << " relative bound = " << rel_bound << " absolute bound = " << abs_bound << "\n"; + } + return valid; +} + +template +bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_error_factor = 10.0, + double rel_error_factor = 200.0, std::ostream &out = std::cout) { + using T = typename vecType2::value_type; + std::size_t n = v.size(); + if (n != v_ref.size()) { + out << "Mismatching size got " << n << " expected " << v_ref.size() << "\n"; + return false; + } + auto max_norm_ref = + *std::max_element(std::begin(v_ref), std::end(v_ref), + [](const T &a, const T &b) { return std::abs(a) < std::abs(b); }); + // Heuristic for the average-case error margins + double abs_error_margin = + abs_error_factor * std::abs(max_norm_ref) * std::log2(static_cast(n)); + double rel_error_margin = rel_error_factor * std::log2(static_cast(n)); + + constexpr int max_print = 20; + int count = 0; + bool valid = true; + + for (std::size_t i = 0; i < n; ++i) { + // Allow to convert the unsigned index `i` to a signed one to keep this function generic and allow for `v` and `v_ref` to be a vector, a pointer or a random access iterator. +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" + auto res = v[i]; + auto ref = v_ref[i]; +#pragma clang diagnostic pop + if (!check_equal(res, ref, abs_error_margin, rel_error_margin, out)) { + out << " at index i =" << i << "\n"; + valid = false; + ++count; + if (count > max_print) { + return valid; + } + } + } + + return valid; +} + +#endif // _TEST_COMMON_HPP__ diff --git a/tests/unit_tests/sparse_blas/source/CMakeLists.txt b/tests/unit_tests/sparse_blas/source/CMakeLists.txt new file mode 100644 index 000000000..51ace5906 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/CMakeLists.txt @@ -0,0 +1,56 @@ +#=============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +set(SPBLAS_SOURCES "sparse_gemv_usm.cpp" "sparse_gemv_buffer.cpp") + +include(WarningsUtils) + +if (BUILD_SHARED_LIBS) + add_library(spblas_source_rt OBJECT ${SPBLAS_SOURCES}) + target_compile_options(spblas_source_rt PRIVATE -DCALL_RT_API -DNOMINMAX) + target_include_directories(spblas_source_rt + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../include + PUBLIC ${PROJECT_SOURCE_DIR}/include + PUBLIC ${PROJECT_SOURCE_DIR}/deps/googletest/include + PUBLIC ${CMAKE_BINARY_DIR}/bin + ) + if (USE_ADD_SYCL_TO_TARGET_INTEGRATION) + add_sycl_to_target(TARGET spblas_source_rt SOURCES ${SPBLAS_SOURCES}) + else () + target_link_libraries(spblas_source_rt PUBLIC ONEMKL::SYCL::SYCL) + endif () + target_link_libraries(spblas_source_rt PRIVATE onemkl_warnings) +endif () + +add_library(spblas_source_ct OBJECT ${SPBLAS_SOURCES}) +target_compile_options(spblas_source_ct PRIVATE -DNOMINMAX) +target_include_directories(spblas_source_ct + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../include + PUBLIC ${PROJECT_SOURCE_DIR}/include + PUBLIC ${PROJECT_SOURCE_DIR}/deps/googletest/include + PUBLIC ${CMAKE_BINARY_DIR}/bin + ) +if (USE_ADD_SYCL_TO_TARGET_INTEGRATION) + add_sycl_to_target(TARGET spblas_source_ct SOURCES ${SPBLAS_SOURCES}) +else () + target_link_libraries(spblas_source_ct PUBLIC ONEMKL::SYCL::SYCL) +endif () +target_link_libraries(spblas_source_ct PRIVATE onemkl_warnings) diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp new file mode 100644 index 000000000..c831514e7 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp @@ -0,0 +1,183 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" +#include "sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +extern std::vector devices; + +namespace { + +template +int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + fpType beta) { + sycl::queue main_queue(*dev, exception_handler_t()); + + intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + std::size_t opa_nrows = + static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); + std::size_t opa_ncols = + static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); + + // Input matrix in CSR format + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, + ia_host, ja_host, a_host); + + // Input and output dense vectors + // The host and device output are filled with a dummy value to spot mistakes. + std::vector x_host; + std::vector y_host(opa_nrows, fpType(2)); + rand_vector(x_host, opa_ncols); + + // Shuffle ordering of column indices/values to test sortedness + shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto x_buf = make_buffer(x_host); + auto y_buf = make_buffer(y_host); + auto a_buf = make_buffer(a_host); + + oneapi::mkl::sparse::matrix_handle_t handle = nullptr; + sycl::event ev_release; + try { + CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); + + CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, nnz, + index, ia_buf, ja_buf, a_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, handle); + + CALL_RT_OR_CT(oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, x_buf, + beta, y_buf); + + CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free(main_queue, &handle); + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; + return 0; + } + + // Compute reference. + // The reference is filled with a dummy value to spot mistakes. + std::vector y_ref_host(opa_nrows, fpType(2)); + prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, + int_index, transpose_val, alpha, beta, x_host.data(), + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto y_acc = y_buf.template get_host_access(sycl::read_only); + bool valid = check_equal_vector(y_acc, y_ref_host); + + ev_release.wait_and_throw(); + return static_cast(valid); +} + +class SparseGemvBufferTests : public ::testing::TestWithParam {}; + +template +void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + // Basic test + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero)); + // Test index_base 1 + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, transpose_val, + fp_one, fp_zero)); + // Test non-default alpha + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), fp_zero)); + // Test non-default beta + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, + set_fp_value()(3.2f, 1.f))); + // Test 0 alpha + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one)); + // Test 0 alpha and beta + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_zero)); + // Test int64 indices + EXPECT_TRUEORSKIP( + test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, fp_one)); +} + +TEST_P(SparseGemvBufferTests, RealSinglePrecision) { + using fpType = float; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); +} + +TEST_P(SparseGemvBufferTests, RealDoublePrecision) { + using fpType = double; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); +} + +TEST_P(SparseGemvBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); + test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); +} + +TEST_P(SparseGemvBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); + test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); +} + +INSTANTIATE_TEST_SUITE_P(SparseGemvBufferTestSuite, SparseGemvBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp new file mode 100644 index 000000000..e6b0520e1 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp @@ -0,0 +1,209 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" +#include "sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +extern std::vector devices; + +namespace { + +template +int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + fpType beta) { + sycl::queue main_queue(*dev, exception_handler_t()); + + intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + std::size_t opa_nrows = + static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); + std::size_t opa_ncols = + static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); + + // Input matrix in CSR format + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, + ia_host, ja_host, a_host); + + // Input and output dense vectors + // The host and device output are filled with a dummy value to spot mistakes. + std::vector x_host; + std::vector y_host(opa_nrows, fpType(2)); + rand_vector(x_host, opa_ncols); + + // Shuffle ordering of column indices/values to test sortedness + shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); + auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *x_usm = x_usm_uptr.get(); + fpType *y_usm = y_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector gemv_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + gemv_dependencies.push_back( + main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); + gemv_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + + sycl::event ev_copy, ev_release; + oneapi::mkl::sparse::matrix_handle_t handle = nullptr; + try { + sycl::event ev_set, ev_opt, ev_gemv; + CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); + + CALL_RT_OR_CT(ev_set = oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, + nnz, index, ia_usm, ja_usm, a_usm, mat_dependencies); + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, + handle, { ev_set }); + + gemv_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_gemv = oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, + x_usm, beta, y_usm, gemv_dependencies); + + CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, + { ev_gemv }); + + ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_gemv); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free(main_queue, &handle); + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; + return 0; + } + + // Compute reference. + // The reference is filled with a dummy value to spot mistakes. + std::vector y_ref_host(opa_nrows, fpType(2)); + prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, + int_index, transpose_val, alpha, beta, x_host.data(), + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(y_host, y_ref_host); + + ev_release.wait_and_throw(); + return static_cast(valid); +} + +class SparseGemvUsmTests : public ::testing::TestWithParam {}; + +template +void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + // Basic test + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero)); + // Test index_base 1 + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, transpose_val, + fp_one, fp_zero)); + // Test non-default alpha + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), fp_zero)); + // Test non-default beta + EXPECT_TRUEORSKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, + set_fp_value()(3.2f, 1.f))); + // Test 0 alpha + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one)); + // Test 0 alpha and beta + EXPECT_TRUEORSKIP( + test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_zero)); + // Test int64 indices + EXPECT_TRUEORSKIP( + test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, fp_one)); +} + +TEST_P(SparseGemvUsmTests, RealSinglePrecision) { + using fpType = float; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); +} + +TEST_P(SparseGemvUsmTests, RealDoublePrecision) { + using fpType = double; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); +} + +TEST_P(SparseGemvUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); + test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); +} + +TEST_P(SparseGemvUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + test_helper(GetParam(), oneapi::mkl::transpose::nontrans); + test_helper(GetParam(), oneapi::mkl::transpose::trans); + test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); +} + +INSTANTIATE_TEST_SUITE_P(SparseGemvUsmTestSuite, SparseGemvUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace From f6549a58c1d56f5b7c77c11c55931082af4ee375 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 17 Oct 2023 15:30:43 +0100 Subject: [PATCH 2/7] Adapt examples to be able to switch to complex --- examples/include/example_helper.hpp | 50 +++++++++++++++++++ .../sparse_blas_gemv_usm_mklcpu.cpp | 33 +++++++----- .../sparse_blas_gemv_usm.cpp | 33 +++++++----- 3 files changed, 90 insertions(+), 26 deletions(-) diff --git a/examples/include/example_helper.hpp b/examples/include/example_helper.hpp index 312404ee7..4a89e6fae 100644 --- a/examples/include/example_helper.hpp +++ b/examples/include/example_helper.hpp @@ -26,8 +26,30 @@ #include #endif +#include +#include +#include +#include #include +// Complex helpers. +template +struct complex_info { + using real_type = T; + static const bool is_complex = false; +}; + +template +struct complex_info> { + using real_type = T; + static const bool is_complex = true; +}; + +template +struct is_complex : std::false_type {}; +template +struct is_complex> : std::true_type {}; + // // helpers for initializing templated scalar data type values. // @@ -36,6 +58,12 @@ fp set_fp_value(fp arg1, fp /*arg2*/ = fp(0.0)) { return arg1; } +template +std::complex set_fp_value(std::complex arg1, + std::complex arg2 = std::complex(0.0)) { + return std::complex(arg1.real(), arg2.real()); +} + // // print a 2x2 block of data from matrix M using the sycl accessor // @@ -121,6 +149,28 @@ intType generate_sparse_matrix(const intType nx, intType *ia, intType *ja, fp *a return nnz; } +template +bool check_errors(fp x, fp x_ref, fp_real bound) { + fp_real aerr = std::abs(x - x_ref); + fp_real rerr = aerr / (std::abs(x_ref) + std::numeric_limits::epsilon()); + bool ok = (rerr <= bound) || (aerr <= bound); + if (!ok) + std::cout << "relative error = " << rerr << " absolute error = " << aerr + << " limit = " << bound; + return ok; +} + +template +bool check_result(fp res, fp ref, intType nFlops, intType index) { + bool check; + using fp_real = typename complex_info::real_type; + fp_real bound = std::numeric_limits::epsilon() * static_cast(nFlops); + check = check_errors(res, ref, bound); + if (!check) + std::cout << " in index: " << index << std::endl; + return check; +} + template void free_vec(std::vector &ptr_vec, sycl::queue queue) { for (auto ptr : ptr_vec) { diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp index 4b86f976d..edb6d7e1f 100644 --- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp @@ -59,10 +59,8 @@ // // is performed and finally the results are post processed. // +template int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { - using fp = float; - using intType = std::int32_t; - // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -162,25 +160,34 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // fp *res = y; + const bool isConj = (transA == oneapi::mkl::transpose::conjtrans); + for (intType row = 0; row < nrows; row++) { + z[row] *= beta; + } for (intType row = 0; row < nrows; row++) { - fp tmp = set_fp_value(fp(0.0)); + fp tmp = alpha * x[row]; for (intType i = ia[row]; i < ia[row + 1]; i++) { - tmp += a[i] * x[ja[i]]; + if constexpr (is_complex()) { + z[ja[i]] += tmp * (isConj ? std::conj(a[i]) : a[i]); + } + else { + z[ja[i]] += tmp * a[i]; + } } - z[row] = alpha * tmp + beta * z[row]; } - fp diff = set_fp_value(fp(0.0)); - for (intType i = 0; i < nrows; i++) { - diff += (z[i] - res[i]) * (z[i] - res[i]); + bool good = true; + for (intType row = 0; row < nrows; row++) { + good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv residual:\n" - << "\t\t\t" << diff << "\n\tFinished" << std::endl; + + std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + << std::endl; free_vec(fp_ptr_vec, cpu_queue); free_vec(int_ptr_vec, cpu_queue); - if (diff > 0) + if (!good) return 1; return 0; @@ -230,7 +237,7 @@ int main(int /*argc*/, char ** /*argv*/) { << std::endl; std::cout << "Running with single precision real data type:" << std::endl; - run_sparse_matrix_vector_multiply_example(cpu_dev); + run_sparse_matrix_vector_multiply_example(cpu_dev); std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp index fb5dc8a67..35f1081ad 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp @@ -60,10 +60,8 @@ // // is performed and finally the results are post processed. // +template int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { - using fp = float; - using intType = std::int32_t; - // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -161,25 +159,34 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // fp *res = y; + const bool isConj = (transA == oneapi::mkl::transpose::conjtrans); + for (intType row = 0; row < nrows; row++) { + z[row] *= beta; + } for (intType row = 0; row < nrows; row++) { - fp tmp = set_fp_value(fp(0.0)); + fp tmp = alpha * x[row]; for (intType i = ia[row]; i < ia[row + 1]; i++) { - tmp += a[i] * x[ja[i]]; + if constexpr (is_complex()) { + z[ja[i]] += tmp * (isConj ? std::conj(a[i]) : a[i]); + } + else { + z[ja[i]] += tmp * a[i]; + } } - z[row] = alpha * tmp + beta * z[row]; } - fp diff = set_fp_value(fp(0.0)); - for (intType i = 0; i < nrows; i++) { - diff += (z[i] - res[i]) * (z[i] - res[i]); + bool good = true; + for (intType row = 0; row < nrows; row++) { + good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv residual:\n" - << "\t\t\t" << diff << "\n\tFinished" << std::endl; + + std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + << std::endl; free_vec(fp_ptr_vec, main_queue); free_vec(int_ptr_vec, main_queue); - if (diff > 0) + if (!good) return 1; return 0; @@ -238,7 +245,7 @@ int main(int /*argc*/, char ** /*argv*/) { } std::cout << "Running with single precision real data type:" << std::endl; - run_sparse_matrix_vector_multiply_example(dev); + run_sparse_matrix_vector_multiply_example(dev); std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { From f7c72b97e675d87fbf141c3ae79a60cd637a60e9 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 17 Oct 2023 16:56:03 +0100 Subject: [PATCH 3/7] Re-use macros in function_table --- src/sparse_blas/function_table.hpp | 301 +++++-------------------- src/sparse_blas/macros.hpp | 39 ++++ src/sparse_blas/sparse_blas_loader.cpp | 21 +- 3 files changed, 95 insertions(+), 266 deletions(-) create mode 100644 src/sparse_blas/macros.hpp diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index d4656190a..4d2296ff5 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -21,6 +21,52 @@ #define _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ #include "oneapi/mkl/sparse_blas/types.hpp" +#include "sparse_blas/macros.hpp" + +#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ + INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, \ + sycl::buffer & row_ptr, sycl::buffer & col_ind, \ + sycl::buffer & val); \ + sycl::event (*set_csr_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ + INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, INT_TYPE * row_ptr, \ + INT_TYPE * col_ind, FP_TYPE * val, const std::vector &dependencies) + +#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ + void (*gemv_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ + oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, \ + const FP_TYPE beta, sycl::buffer &y); \ + sycl::event (*gemv_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ + oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, \ + FP_TYPE *y, const std::vector &dependencies) + +#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ + void (*trsv_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ + oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ + sycl::buffer & x, sycl::buffer & y); \ + sycl::event (*trsv_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ + oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ + const FP_TYPE *x, FP_TYPE *y, const std::vector &dependencies) + +#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ + void (*gemm_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ + const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, \ + sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ + const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc); \ + sycl::event (*gemm_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ + const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *B, \ + const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, FP_TYPE *C, \ + const std::int64_t ldc, const std::vector &dependencies) typedef struct { int version; @@ -30,117 +76,7 @@ typedef struct { oneapi::mkl::sparse::matrix_handle_t *p_handle, const std::vector &dependencies); - // set_csr_data - void (*set_csr_data_buffer_rf_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer &val); - void (*set_csr_data_buffer_rd_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer &val); - void (*set_csr_data_buffer_cf_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer, 1> &val); - void (*set_csr_data_buffer_cd_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer, 1> &val); - void (*set_csr_data_buffer_rf_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer &val); - void (*set_csr_data_buffer_rd_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer &val); - void (*set_csr_data_buffer_cf_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer, 1> &val); - void (*set_csr_data_buffer_cd_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - sycl::buffer &row_ptr, - sycl::buffer &col_ind, - sycl::buffer, 1> &val); - sycl::event (*set_csr_data_usm_rf_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - std::int32_t *row_ptr, std::int32_t *col_ind, float *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_rd_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - std::int32_t *row_ptr, std::int32_t *col_ind, - double *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_cf_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - std::int32_t *row_ptr, std::int32_t *col_ind, - std::complex *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_cd_i32)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int32_t num_rows, std::int32_t num_cols, - std::int32_t nnz, oneapi::mkl::index_base index, - std::int32_t *row_ptr, std::int32_t *col_ind, - std::complex *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_rf_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - std::int64_t *row_ptr, std::int64_t *col_ind, float *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_rd_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - std::int64_t *row_ptr, std::int64_t *col_ind, - double *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_cf_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - std::int64_t *row_ptr, std::int64_t *col_ind, - std::complex *val, - const std::vector &dependencies); - sycl::event (*set_csr_data_usm_cd_i64)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t handle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, oneapi::mkl::index_base index, - std::int64_t *row_ptr, std::int64_t *col_ind, - std::complex *val, - const std::vector &dependencies); + FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA); // optimize_* sycl::event (*optimize_gemv)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, @@ -151,141 +87,14 @@ typedef struct { oneapi::mkl::sparse::matrix_handle_t handle, const std::vector &dependencies); - // gemv - void (*gemv_buffer_rf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer &x, const float beta, sycl::buffer &y); - void (*gemv_buffer_rd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer &x, const double beta, - sycl::buffer &y); - void (*gemv_buffer_cf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &x, const std::complex beta, - sycl::buffer, 1> &y); - void (*gemv_buffer_cd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &x, - const std::complex beta, - sycl::buffer, 1> &y); - sycl::event (*gemv_usm_rf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - const float *x, const float beta, float *y, - const std::vector &dependencies); - sycl::event (*gemv_usm_rd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - const double *x, const double beta, double *y, - const std::vector &dependencies); - sycl::event (*gemv_usm_cf)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *x, const std::complex beta, - std::complex *y, - const std::vector &dependencies); - sycl::event (*gemv_usm_cd)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *x, const std::complex beta, - std::complex *y, - const std::vector &dependencies); - - // trsv - void (*trsv_buffer_rf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, - sycl::buffer &y); - void (*trsv_buffer_rd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer &x, sycl::buffer &y); - void (*trsv_buffer_cf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &x, - sycl::buffer, 1> &y); - void (*trsv_buffer_cd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &x, - sycl::buffer, 1> &y); - sycl::event (*trsv_usm_rf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, const float *x, - float *y, const std::vector &dependencies); - sycl::event (*trsv_usm_rd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, const double *x, - double *y, const std::vector &dependencies); - sycl::event (*trsv_usm_cf)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *x, std::complex *y, - const std::vector &dependencies); - sycl::event (*trsv_usm_cd)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *x, std::complex *y, - const std::vector &dependencies); - - // gemm - void (*gemm_buffer_rf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, - const float alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer &B, const std::int64_t columns, - const std::int64_t ldb, const float beta, sycl::buffer &C, - const std::int64_t ldc); - void (*gemm_buffer_rd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, - const double alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer &B, const std::int64_t columns, - const std::int64_t ldb, const double beta, sycl::buffer &C, - const std::int64_t ldc); - void (*gemm_buffer_cf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &B, const std::int64_t columns, - const std::int64_t ldb, const std::complex beta, - sycl::buffer, 1> &C, const std::int64_t ldc); - void (*gemm_buffer_cd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, - const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - sycl::buffer, 1> &B, const std::int64_t columns, - const std::int64_t ldb, const std::complex beta, - sycl::buffer, 1> &C, const std::int64_t ldc); - sycl::event (*gemm_usm_rf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, const float alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, const float *B, - const std::int64_t columns, const std::int64_t ldb, const float beta, - float *C, const std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*gemm_usm_rd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, const double alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, const double *B, - const std::int64_t columns, const std::int64_t ldb, - const double beta, double *C, const std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*gemm_usm_cf)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *B, const std::int64_t columns, - const std::int64_t ldb, const std::complex beta, - std::complex *C, const std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*gemm_usm_cd)(sycl::queue &queue, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, const std::complex alpha, - oneapi::mkl::sparse::matrix_handle_t A_handle, - const std::complex *B, const std::int64_t columns, - const std::int64_t ldb, const std::complex beta, - std::complex *C, const std::int64_t ldc, - const std::vector &dependencies); + FOR_EACH_FP_TYPE(DEFINE_GEMV); + FOR_EACH_FP_TYPE(DEFINE_TRSV); + FOR_EACH_FP_TYPE(DEFINE_GEMM); } sparse_blas_function_table_t; +#undef DEFINE_SET_CSR_DATA +#undef DEFINE_GEMV +#undef DEFINE_TRSV +#undef DEFINE_GEMM + #endif // _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ diff --git a/src/sparse_blas/macros.hpp b/src/sparse_blas/macros.hpp new file mode 100644 index 000000000..a4ef88e35 --- /dev/null +++ b/src/sparse_blas/macros.hpp @@ -0,0 +1,39 @@ +/******************************************************************************* +* Copyright 2023 Codeplay Software Ltd. +* +* (*Licensed under the Apache License, Version 2.0 )(the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_MACROS_HPP_ +#define _ONEMKL_SPARSE_BLAS_MACROS_HPP_ + +#define FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + DEFINE_MACRO(float, _rf); \ + DEFINE_MACRO(double, _rd); \ + DEFINE_MACRO(std::complex, _cf); \ + DEFINE_MACRO(std::complex, _cd) + +#define FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, INT_TYPE, INT_SUFFIX) \ + DEFINE_MACRO(float, _rf, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(double, _rd, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(std::complex, _cf, INT_TYPE, INT_SUFFIX); \ + DEFINE_MACRO(std::complex, _cd, INT_TYPE, INT_SUFFIX) + +#define FOR_EACH_FP_AND_INT_TYPE(DEFINE_MACRO) \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ + FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) + +#endif // _ONEMKL_SPARSE_BLAS_MACROS_HPP_ diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp index 07d065561..3488d52ba 100644 --- a/src/sparse_blas/sparse_blas_loader.cpp +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -21,24 +21,9 @@ #include "function_table_initializer.hpp" #include "sparse_blas/function_table.hpp" +#include "sparse_blas/macros.hpp" #include "oneapi/mkl/detail/get_device_id.hpp" -#define FOR_EACH_FP_TYPE(DEFINE_MACRO) \ - DEFINE_MACRO(float, _rf); \ - DEFINE_MACRO(double, _rd); \ - DEFINE_MACRO(std::complex, _cf); \ - DEFINE_MACRO(std::complex, _cd) - -#define FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, INT_TYPE, INT_SUFFIX) \ - DEFINE_MACRO(float, _rf, INT_TYPE, INT_SUFFIX); \ - DEFINE_MACRO(double, _rd, INT_TYPE, INT_SUFFIX); \ - DEFINE_MACRO(std::complex, _cf, INT_TYPE, INT_SUFFIX); \ - DEFINE_MACRO(std::complex, _cd, INT_TYPE, INT_SUFFIX) - -#define FOR_EACH_FP_AND_INT_TYPE(DEFINE_MACRO) \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) - namespace oneapi::mkl::sparse { static oneapi::mkl::detail::table_initializer Date: Wed, 18 Oct 2023 13:27:37 +0100 Subject: [PATCH 4/7] Fill y with random values --- .../sparse_blas/source/sparse_gemv_buffer.cpp | 18 +++++++++++------ .../sparse_blas/source/sparse_gemv_usm.cpp | 20 ++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp index c831514e7..4534b67d9 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp @@ -61,19 +61,20 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri ia_host, ja_host, a_host); // Input and output dense vectors - // The host and device output are filled with a dummy value to spot mistakes. - std::vector x_host; - std::vector y_host(opa_nrows, fpType(2)); + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); // Shuffle ordering of column indices/values to test sortedness shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); auto ia_buf = make_buffer(ia_host); auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); auto x_buf = make_buffer(x_host); auto y_buf = make_buffer(y_host); - auto a_buf = make_buffer(a_host); oneapi::mkl::sparse::matrix_handle_t handle = nullptr; sycl::event ev_release; @@ -106,8 +107,6 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri } // Compute reference. - // The reference is filled with a dummy value to spot mistakes. - std::vector y_ref_host(opa_nrows, fpType(2)); prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, int_index, transpose_val, alpha, beta, x_host.data(), y_ref_host.data()); @@ -122,6 +121,13 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri class SparseGemvBufferTests : public ::testing::TestWithParam {}; +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + */ template void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val) { double density_A_matrix = 0.8; diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp index e6b0520e1..873855ca9 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp @@ -61,25 +61,26 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri ia_host, ja_host, a_host); // Input and output dense vectors - // The host and device output are filled with a dummy value to spot mistakes. - std::vector x_host; - std::vector y_host(opa_nrows, fpType(2)); + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); // Shuffle ordering of column indices/values to test sortedness shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); intType *ia_usm = ia_usm_uptr.get(); intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); fpType *x_usm = x_usm_uptr.get(); fpType *y_usm = y_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); std::vector mat_dependencies; std::vector gemv_dependencies; @@ -132,8 +133,6 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri } // Compute reference. - // The reference is filled with a dummy value to spot mistakes. - std::vector y_ref_host(opa_nrows, fpType(2)); prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, int_index, transpose_val, alpha, beta, x_host.data(), y_ref_host.data()); @@ -148,6 +147,13 @@ int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matri class SparseGemvUsmTests : public ::testing::TestWithParam {}; +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + */ template void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val) { double density_A_matrix = 0.8; From 0c95a49df8f3a929ef6f7fbc65b9d2becd7813e3 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 19 Oct 2023 10:22:13 +0100 Subject: [PATCH 5/7] Allow to skip double tests --- tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp | 2 ++ tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp index 4534b67d9..54c6dfdb6 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp @@ -165,6 +165,7 @@ TEST_P(SparseGemvBufferTests, RealSinglePrecision) { TEST_P(SparseGemvBufferTests, RealDoublePrecision) { using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); test_helper(GetParam(), oneapi::mkl::transpose::nontrans); test_helper(GetParam(), oneapi::mkl::transpose::trans); } @@ -178,6 +179,7 @@ TEST_P(SparseGemvBufferTests, ComplexSinglePrecision) { TEST_P(SparseGemvBufferTests, ComplexDoublePrecision) { using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); test_helper(GetParam(), oneapi::mkl::transpose::nontrans); test_helper(GetParam(), oneapi::mkl::transpose::trans); test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp index 873855ca9..e3080c5e7 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp @@ -191,6 +191,7 @@ TEST_P(SparseGemvUsmTests, RealSinglePrecision) { TEST_P(SparseGemvUsmTests, RealDoublePrecision) { using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); test_helper(GetParam(), oneapi::mkl::transpose::nontrans); test_helper(GetParam(), oneapi::mkl::transpose::trans); } @@ -204,6 +205,7 @@ TEST_P(SparseGemvUsmTests, ComplexSinglePrecision) { TEST_P(SparseGemvUsmTests, ComplexDoublePrecision) { using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); test_helper(GetParam(), oneapi::mkl::transpose::nontrans); test_helper(GetParam(), oneapi::mkl::transpose::trans); test_helper(GetParam(), oneapi::mkl::transpose::conjtrans); From 183864162e13647468c4230e8cf0d8510b4c9ea2 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 20 Oct 2023 14:29:43 +0100 Subject: [PATCH 6/7] Add detail namespace for consistency --- src/sparse_blas/backends/mkl_common/mkl_operations.cxx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx index c854b63be..72a2b7b2c 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx @@ -20,7 +20,7 @@ sycl::event optimize_gemv(sycl::queue& queue, transpose transpose_val, detail::matrix_handle* handle, const std::vector& dependencies) { - return oneapi::mkl::sparse::optimize_gemv(queue, transpose_val, get_handle(handle), + return oneapi::mkl::sparse::optimize_gemv(queue, transpose_val, detail::get_handle(handle), dependencies); } @@ -35,7 +35,7 @@ std::enable_if_t> gemv( sycl::queue& queue, transpose transpose_val, const fpType alpha, detail::matrix_handle* A_handle, sycl::buffer& x, const fpType beta, sycl::buffer& y) { - oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, get_handle(A_handle), x, beta, y); + oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y); } template @@ -43,7 +43,7 @@ std::enable_if_t, sycl::event> gemv( sycl::queue& queue, transpose transpose_val, const fpType alpha, detail::matrix_handle* A_handle, const fpType* x, const fpType beta, fpType* y, const std::vector& dependencies) { - return oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, get_handle(A_handle), x, beta, y, + return oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y, dependencies); } From dc1167a2b788eccc10f6720f83c1577f4fca9074 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 23 Oct 2023 09:28:51 +0100 Subject: [PATCH 7/7] Do not store queue as reference --- tests/unit_tests/sparse_blas/include/test_common.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index 0cf63e10f..c7b8cfc2f 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -70,10 +70,10 @@ struct exception_handler_t { // Use a unique_ptr to automatically free device memory on unique_ptr destruction. template -auto malloc_device_uptr(sycl::queue &q, std::size_t num_elts) { +auto malloc_device_uptr(sycl::queue q, std::size_t num_elts) { struct Deleter { - sycl::queue &q; - Deleter(sycl::queue &_q) : q(_q) {} + sycl::queue q; + Deleter(sycl::queue _q) : q(_q) {} void operator()(T *ptr) { sycl::free(ptr, q); }