diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 4d713d5ed..0a9de7579 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -34,9 +34,9 @@ jobs: fail-fast: false matrix: include: - - config: portBLAS + - config: generic SYCL BLAS domain: blas - build_options: -DREF_BLAS_ROOT=${PWD}/lapack/install -DENABLE_PORTBLAS_BACKEND=ON -DENABLE_MKLCPU_BACKEND=OFF -DPORTBLAS_TUNING_TARGET=INTEL_CPU + build_options: -DREF_BLAS_ROOT=${PWD}/lapack/install -DENABLE_GENERIC_BLAS_BACKEND=ON -DENABLE_MKLCPU_BACKEND=OFF -DGENERIC_BLAS_TUNING_TARGET=INTEL_CPU - config: portFFT domain: dft build_options: -DENABLE_PORTFFT_BACKEND=ON -DENABLE_MKLCPU_BACKEND=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 184314e7b..cef412bdc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ endif() option(ENABLE_CUBLAS_BACKEND "Enable the cuBLAS backend for the BLAS interface" OFF) option(ENABLE_ROCBLAS_BACKEND "Enable the rocBLAS backend for the BLAS interface" OFF) option(ENABLE_NETLIB_BACKEND "Enable the Netlib backend for the BLAS interface" OFF) -option(ENABLE_PORTBLAS_BACKEND "Enable the portBLAS backend for the BLAS interface. Cannot be used with other BLAS backends." OFF) +option(ENABLE_GENERIC_BLAS_BACKEND "Enable the generic BLAS backend for the BLAS interface. Cannot be used with other BLAS backends." OFF) # rand option(ENABLE_CURAND_BACKEND "Enable the cuRAND backend for the RNG interface" OFF) @@ -76,6 +76,11 @@ option(BUILD_EXAMPLES "" ON) ## Documentation option(BUILD_DOC "" OFF) +if(DEFINED ENABLE_PORTBLAS_BACKEND) + message(WARNING "PORTBLAS_BACKEND is deprecated, please use ENABLE_GENERIC_BLAS_BACKEND.") + set(ENABLE_GENERIC_BLAS_BACKEND ${ENABLE_PORTBLAS_BACKEND}) +endif() + ## Supported domains set(DOMAINS_LIST "") if(ENABLE_MKLCPU_BACKEND @@ -83,7 +88,7 @@ if(ENABLE_MKLCPU_BACKEND OR ENABLE_CUBLAS_BACKEND OR ENABLE_ROCBLAS_BACKEND OR ENABLE_NETLIB_BACKEND - OR ENABLE_PORTBLAS_BACKEND) + OR ENABLE_GENERIC_BLAS_BACKEND) list(APPEND DOMAINS_LIST "blas") endif() if(ENABLE_MKLCPU_BACKEND @@ -112,13 +117,13 @@ if(ENABLE_MKLCPU_BACKEND list(APPEND DOMAINS_LIST "sparse_blas") endif() -if(ENABLE_PORTBLAS_BACKEND +if(ENABLE_GENERIC_BLAS_BACKEND AND (ENABLE_MKLCPU_BACKEND OR ENABLE_MKLGPU_BACKEND OR ENABLE_CUBLAS_BACKEND OR ENABLE_ROCBLAS_BACKEND OR ENABLE_NETLIB_BACKEND)) - message(FATAL_ERROR "ENABLE_PORTBLAS_BACKEND cannot be enabled at the same time as other BLAS backends.") + message(FATAL_ERROR "ENABLE_GENERIC_BLAS_BACKEND cannot be enabled at the same time as other BLAS backends.") endif() if (ENABLE_PORTFFT_BACKEND diff --git a/README.md b/README.md index f74d6600a..4345fb7c9 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org). AMD GPU - portBLAS + generic SYCL BLAS x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU, Other SYCL devices (unsupported) @@ -192,7 +192,7 @@ Supported compilers include: Dynamic, Static - portBLAS + generic SYCL BLAS Intel DPC++
Open DPC++ Dynamic, Static @@ -203,7 +203,7 @@ Supported compilers include: Dynamic, Static - portBLAS + generic SYCL BLAS Intel DPC++
Open DPC++ Dynamic, Static @@ -214,7 +214,7 @@ Supported compilers include: Dynamic, Static - portBLAS + generic SYCL BLAS Open DPC++ Dynamic, Static @@ -225,13 +225,13 @@ Supported compilers include: Dynamic, Static - portBLAS + generic SYCL BLAS Open DPC++ Dynamic, Static Other SYCL devices (unsupported) - portBLAS + generic SYCL BLAS Intel DPC++
Open DPC++ Dynamic, Static @@ -549,7 +549,7 @@ Product | Supported Version | License [AMD rocFFT](https://github.com/ROCm/rocFFT) | rocm-5.4.3 | [AMD License](https://github.com/ROCm/rocFFT/blob/rocm-5.4.3/LICENSE.md) [AMD rocSPARSE](https://github.com/ROCm/rocSPARSE) | 3.1.2 | [AMD License](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md) [NETLIB LAPACK](https://www.netlib.org/) | [5d4180c](https://github.com/Reference-LAPACK/lapack/commit/5d4180cf8288ae6ad9a771d18793d15bd0c5643c) | [BSD like license](http://www.netlib.org/lapack/LICENSE.txt) -[portBLAS](https://github.com/codeplaysoftware/portBLAS) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portBLAS/blob/main/LICENSE) +[Generic SYCL BLAS](https://github.com/uxlfoundation/generic-sycl-components/tree/main/onemath/sycl/blas) | 0.1 | [Apache License v2.0](https://github.com/uxlfoundation/generic-sycl-components/blob/main/LICENSE) [portFFT](https://github.com/codeplaysoftware/portFFT) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portFFT/blob/main/LICENSE) --- diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index 4f1c076ef..26e4ba818 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -59,8 +59,8 @@ or ``clang++`` and ``clang`` respectively when using the Open DPC++ Compiler. Backends should be enabled by setting ``-DENABLE__BACKEND=True`` for each desired backend. By default, only the ``MKLGPU`` and ``MKLCPU`` backends are enabled. Multiple backends for multiple device vendors can be -enabled at once (albeit with limitations when using portBLAS and portFFT). The -supported backends for the compilers are given in the table at `oneMath +enabled at once (albeit with limitations when using oneMath generic SYCL BLAS and portFFT). +The supported backends for the compilers are given in the table at `oneMath supported configurations table `_, and the CMake option names are given in the table below. Some backends may @@ -127,7 +127,7 @@ The most important supported build options are: * - ENABLE_MKLCPU_THREAD_TBB - True, False - True - * - ENABLE_PORTBLAS_BACKEND + * - ENABLE_GENERIC_BLAS_BACKEND - True, False - False * - ENABLE_PORTFFT_BACKEND @@ -239,19 +239,19 @@ SYCL enables portable heterogeneous computing on a wide range of accelerators. Consequently, it is possible to use oneMath with accelerators not anticipated by the project. -For generic SYCL devices, only portBLAS and portFFT backend are enabled. +For generic SYCL devices, only generic BLAS and portFFT backend are enabled. The user must set the appropriate ``-fsycl-targets`` for their device, and also -any other option required for performance. See `Building for portBLAS`_ and +any other option required for performance. See `Building for oneMath generic SYCL BLAS`_ and `Building for portFFT`_. Extensive testing is strongly advised for these unsupported configurations. .. _build_for_portlibs_dpcpp: -Pure SYCL backends: portBLAS and portFFT -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Pure SYCL backends: generic BLAS and portFFT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -`portBLAS `_ and `portFFT -`_ are experimental pure-SYCL +`Generic SYCL BLAS `_ +and `portFFT `_ are experimental pure-SYCL backends that work on all SYCL targets supported by the DPC++ compiler. Since they support multiple targets, they cannot be enabled with other backends in the same domain, or the ``MKLCPU`` or ``MKLGPU`` backends. Both libraries are @@ -260,35 +260,35 @@ experimental and currently only support a subset of operations and features. For best performance, both libraries must be tuned. See the individual sections for more details. -Both portBLAS and portFFT are used as header-only libraries, and will be +Both generic SYCL BLAS and portFFT are used as header-only libraries, and will be downloaded automatically if not found. -.. _build_for_portblas_dpcpp: +.. _build_for_generic_blas_dpcpp: -Building for portBLAS ---------------------- +Building for oneMath generic SYCL BLAS +-------------------------------------- -`portBLAS `_ is -enabled by setting ``-DENABLE_PORTBLAS_BACKEND=True``. +`onemath generic SYCL BLAS `_ +is enabled by setting ``-DENABLE_GENERIC_BLAS_BACKEND=True``. -By default, the portBLAS backend is not tuned for any specific device. +By default, the generic BLAS backend is not tuned for any specific device. This tuning is required to achieve best performance. -portBLAS can be tuned for a specific hardware target by adding compiler +The generic SYCL BLAS backend can be tuned for a specific hardware target by adding compiler definitions in 2 ways: #. - Manually specify a tuning target with ``-DPORTBLAS_TUNING_TARGET=``. - The list of portBLAS targets can be found - `here `_. + Manually specify a tuning target with ``-DGENERIC_BLAS_TUNING_TARGET=``. + The list of oneMath SYCL BLAS targets can be found + `here `_. This will automatically set ``-fsycl-targets`` if needed. #. If one target is set via ``-fsycl-targets`` the configuration step will - try to automatically detect the portBLAS tuning target. One can manually + try to automatically detect the oneMath SYCL BLAS tuning target. One can manually specify ``-fsycl-targets`` via ``CMAKE_CXX_FLAGS``. See `DPC++ User Manual `_ for more information on ``-fsycl-targets``. -portBLAS relies heavily on JIT compilation. This may cause time-outs on some +OneMath SYCL BLAS relies heavily on JIT compilation. This may cause time-outs on some systems. To avoid this issue, use ahead-of-time compilation through tuning targets or ``sycl-targets``. @@ -439,11 +439,10 @@ Build oneMath for the BLAS domain on a generic SYCL device: -DCMAKE_C_COMPILER=clang \ -DENABLE_MKLCPU_BACKEND=False \ -DENABLE_MKLGPU_BACKEND=False \ - -DENABLE_PORTBLAS_BACKEND=True + -DENABLE_GENERIC_BLAS_BACKEND=True Note that this is not a tested configuration. This builds oneMath with the -portBLAS backend only, for a generic SYCL device supported by the Open DPC++ -project. +generic SYCL BLAS backend only, for a generic SYCL device. Build oneMath for the DFT domain on a generic SYCL device: diff --git a/examples/blas/run_time_dispatching/level3/CMakeLists.txt b/examples/blas/run_time_dispatching/level3/CMakeLists.txt index 1b3f992c4..b7accf474 100644 --- a/examples/blas/run_time_dispatching/level3/CMakeLists.txt +++ b/examples/blas/run_time_dispatching/level3/CMakeLists.txt @@ -40,15 +40,15 @@ endif() if(ENABLE_ROCBLAS_BACKEND) list(APPEND DEVICE_FILTERS "hip:gpu") endif() -if(ENABLE_PORTBLAS_BACKEND) - if(PORTBLAS_TUNING_TARGET) - if(PORTBLAS_TUNING_TARGET MATCHES "INTEL_CPU") +if(ENABLE_GENERIC_BLAS_BACKEND) + if(GENERIC_BLAS_TUNING_TARGET) + if(GENERIC_BLAS_TUNING_TARGET MATCHES "INTEL_CPU") list(APPEND DEVICE_FILTERS "opencl:cpu") - elseif(PORTBLAS_TUNING_TARGET MATCHES "_GPU") + elseif(GENERIC_BLAS_TUNING_TARGET MATCHES "_GPU") list(APPEND DEVICE_FILTERS "*:gpu") endif() else() - # portBLAS default sycl-target is spir64, testing runtime on both supported + # onemath_sycl_blas default sycl-target is spir64, testing runtime on both supported # devices. list(APPEND DEVICE_FILTERS "opencl:cpu;level_zero:gpu") endif() diff --git a/include/oneapi/math/blas.hpp b/include/oneapi/math/blas.hpp index a58f72fb4..86a610eb8 100644 --- a/include/oneapi/math/blas.hpp +++ b/include/oneapi/math/blas.hpp @@ -49,8 +49,8 @@ #ifdef ONEMATH_ENABLE_NETLIB_BACKEND #include "oneapi/math/blas/detail/netlib/blas_ct.hpp" #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND -#include "oneapi/math/blas/detail/portblas/blas_ct.hpp" +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND +#include "oneapi/math/blas/detail/generic/blas_ct.hpp" #endif namespace oneapi { diff --git a/include/oneapi/math/blas/detail/blas_ct_backends.hpp b/include/oneapi/math/blas/detail/blas_ct_backends.hpp index ee2631fba..4128acb3d 100644 --- a/include/oneapi/math/blas/detail/blas_ct_backends.hpp +++ b/include/oneapi/math/blas/detail/blas_ct_backends.hpp @@ -51,7 +51,7 @@ namespace column_major { #define BACKEND netlib #include "blas_ct_backends.hxx" #undef BACKEND -#define BACKEND portblas +#define BACKEND generic #include "blas_ct_backends.hxx" #undef BACKEND @@ -73,7 +73,7 @@ namespace row_major { #define BACKEND netlib #include "blas_ct_backends.hxx" #undef BACKEND -#define BACKEND portblas +#define BACKEND generic #include "blas_ct_backends.hxx" #undef BACKEND diff --git a/include/oneapi/math/blas/detail/portblas/blas_ct.hpp b/include/oneapi/math/blas/detail/generic/blas_ct.hpp similarity index 88% rename from include/oneapi/math/blas/detail/portblas/blas_ct.hpp rename to include/oneapi/math/blas/detail/generic/blas_ct.hpp index c79dc2dbd..31da6d99a 100644 --- a/include/oneapi/math/blas/detail/portblas/blas_ct.hpp +++ b/include/oneapi/math/blas/detail/generic/blas_ct.hpp @@ -17,8 +17,8 @@ * **************************************************************************/ -#ifndef _DETAIL_PORTBLAS_BLAS_CT_HPP_ -#define _DETAIL_PORTBLAS_BLAS_CT_HPP_ +#ifndef _DETAIL_GENERIC_BLAS_BLAS_CT_HPP_ +#define _DETAIL_GENERIC_BLAS_BLAS_CT_HPP_ #if __has_include() #include @@ -30,7 +30,7 @@ #include "oneapi/math/types.hpp" #include "oneapi/math/detail/backend_selector.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" #include "oneapi/math/blas/detail/blas_ct_backends.hpp" namespace oneapi { @@ -54,4 +54,4 @@ namespace row_major { } //namespace math } //namespace oneapi -#endif //_DETAIL_PORTBLAS_BLAS_CT_HPP_ +#endif //_DETAIL_GENERIC_BLAS_BLAS_CT_HPP_ diff --git a/include/oneapi/math/blas/detail/portblas/blas_ct.hxx b/include/oneapi/math/blas/detail/generic/blas_ct.hxx similarity index 56% rename from include/oneapi/math/blas/detail/portblas/blas_ct.hxx rename to include/oneapi/math/blas/detail/generic/blas_ct.hxx index 2f3694c6e..d96f43e62 100644 --- a/include/oneapi/math/blas/detail/portblas/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/generic/blas_ct.hxx @@ -19,4277 +19,4275 @@ // Buffer APIs -void herk(backend_selector selector, uplo upper_lower, transpose trans, +void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void herk(backend_selector selector, uplo upper_lower, transpose trans, +void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void scal(backend_selector selector, std::int64_t n, float alpha, +void scal(backend_selector selector, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void scal(backend_selector selector, std::int64_t n, double alpha, +void scal(backend_selector selector, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void scal(backend_selector selector, std::int64_t n, std::complex alpha, +void scal(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void scal(backend_selector selector, std::int64_t n, std::complex alpha, +void scal(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void scal(backend_selector selector, std::int64_t n, float alpha, +void scal(backend_selector selector, std::int64_t n, float alpha, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void scal(backend_selector selector, std::int64_t n, double alpha, +void scal(backend_selector selector, std::int64_t n, double alpha, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); + oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } -void trmv(backend_selector selector, uplo upper_lower, transpose trans, +void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trmv(backend_selector selector, uplo upper_lower, transpose trans, +void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trmv(backend_selector selector, uplo upper_lower, transpose trans, +void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trmv(backend_selector selector, uplo upper_lower, transpose trans, +void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void tpmv(backend_selector selector, uplo upper_lower, transpose trans, +void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpmv(backend_selector selector, uplo upper_lower, transpose trans, +void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpmv(backend_selector selector, uplo upper_lower, transpose trans, +void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpmv(backend_selector selector, uplo upper_lower, transpose trans, +void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { - oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); +void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { + oneapi::math::blas::generic::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } -void spr(backend_selector selector, uplo upper_lower, std::int64_t n, +void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { - oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + oneapi::math::blas::generic::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void gemm_batch(backend_selector selector, transpose transa, transpose transb, +void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } -void syrk(backend_selector selector, uplo upper_lower, transpose trans, +void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void syrk(backend_selector selector, uplo upper_lower, transpose trans, +void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void syrk(backend_selector selector, uplo upper_lower, transpose trans, +void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void syrk(backend_selector selector, uplo upper_lower, transpose trans, +void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } -void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, +void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + oneapi::math::blas::generic::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } -void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, +void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + oneapi::math::blas::generic::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } -void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, +void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + oneapi::math::blas::generic::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } -void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, +void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + oneapi::math::blas::generic::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } -void her2(backend_selector selector, uplo upper_lower, std::int64_t n, +void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + oneapi::math::blas::generic::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } -void her2(backend_selector selector, uplo upper_lower, std::int64_t n, +void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + oneapi::math::blas::generic::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } -void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } -void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } -void rot(backend_selector selector, std::int64_t n, +void rot(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { - oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); + oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, +void rot(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { - oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); + oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { - oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); + oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { - oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); + oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void axpy(backend_selector selector, std::int64_t n, float alpha, +void axpy(backend_selector selector, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } -void axpy(backend_selector selector, std::int64_t n, double alpha, +void axpy(backend_selector selector, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } -void axpy(backend_selector selector, std::int64_t n, std::complex alpha, +void axpy(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } -void axpy(backend_selector selector, std::int64_t n, std::complex alpha, +void axpy(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } -void axpy_batch(backend_selector selector, std::int64_t n, float alpha, +void axpy_batch(backend_selector selector, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, - stridex, y, incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, + y, incy, stridey, batch_size); } -void axpy_batch(backend_selector selector, std::int64_t n, double alpha, +void axpy_batch(backend_selector selector, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, - stridex, y, incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, + y, incy, stridey, batch_size); } -void axpy_batch(backend_selector selector, std::int64_t n, +void axpy_batch(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, - stridex, y, incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, + y, incy, stridey, batch_size); } -void axpy_batch(backend_selector selector, std::int64_t n, +void axpy_batch(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, - stridex, y, incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, + y, incy, stridey, batch_size); } -void axpby(backend_selector selector, std::int64_t n, float alpha, +void axpby(backend_selector selector, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } -void axpby(backend_selector selector, std::int64_t n, double alpha, +void axpby(backend_selector selector, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } -void axpby(backend_selector selector, std::int64_t n, std::complex alpha, +void axpby(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } -void axpby(backend_selector selector, std::int64_t n, std::complex alpha, +void axpby(backend_selector selector, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } -void sdsdot(backend_selector selector, std::int64_t n, float sb, +void sdsdot(backend_selector selector, std::int64_t n, float sb, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, - result); + oneapi::math::blas::generic::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, + result); } -void gerc(backend_selector selector, std::int64_t m, std::int64_t n, +void gerc(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, - a, lda); + oneapi::math::blas::generic::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void gerc(backend_selector selector, std::int64_t m, std::int64_t n, +void gerc(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, - a, lda); + oneapi::math::blas::generic::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void syr2k(backend_selector selector, uplo upper_lower, transpose trans, +void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void syr2k(backend_selector selector, uplo upper_lower, transpose trans, +void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void syr2k(backend_selector selector, uplo upper_lower, transpose trans, +void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void syr2k(backend_selector selector, uplo upper_lower, transpose trans, +void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemv(backend_selector selector, transpose trans, std::int64_t m, +void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } -void gemv(backend_selector selector, transpose trans, std::int64_t m, +void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } -void gemv(backend_selector selector, transpose trans, std::int64_t m, +void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } -void gemv(backend_selector selector, transpose trans, std::int64_t m, +void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, + incx, beta, y, incy); } -void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, +void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::generic::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } -void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, +void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::generic::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } -void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, +void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::generic::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } -void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, +void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::generic::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } -void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, +void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + oneapi::math::blas::generic::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } -void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, +void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + oneapi::math::blas::generic::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } -void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, +void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + oneapi::math::blas::generic::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } -void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, +void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + oneapi::math::blas::generic::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } -void her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1>& x, std::int64_t incx, +void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, - a, lda); + oneapi::math::blas::generic::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, + lda); } -void her(backend_selector selector, uplo upper_lower, std::int64_t n, +void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, - a, lda); + oneapi::math::blas::generic::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, + lda); } -void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1>& x, std::int64_t incx, +void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& a) { - oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + oneapi::math::blas::generic::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } -void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, +void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& a) { - oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + oneapi::math::blas::generic::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, +void iamin(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, +void iamin(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } -void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } -void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } -void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } -void gemm_bias(backend_selector selector, transpose transa, transpose transb, +void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { - oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, - ldc, co); + oneapi::math::blas::generic::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void gemm_bias(backend_selector selector, transpose transa, transpose transb, +void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { - oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, - ldc, co); + oneapi::math::blas::generic::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void gemm_bias(backend_selector selector, transpose transa, transpose transb, +void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { - oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, - ldc, co); + oneapi::math::blas::generic::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void gemm_bias(backend_selector selector, transpose transa, transpose transb, +void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { - oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, - ldc, co); + oneapi::math::blas::generic::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, +void swap(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, +void swap(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void geru(backend_selector selector, std::int64_t m, std::int64_t n, +void geru(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, - a, lda); + oneapi::math::blas::generic::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void geru(backend_selector selector, std::int64_t m, std::int64_t n, +void geru(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, - a, lda); + oneapi::math::blas::generic::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void nrm2(backend_selector selector, std::int64_t n, +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void gemm(backend_selector selector, transpose transa, transpose transb, +void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + oneapi::math::blas::generic::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } -void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + oneapi::math::blas::generic::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } -void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, +void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + oneapi::math::blas::generic::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, +void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + oneapi::math::blas::generic::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, + lda); } -void trsm(backend_selector selector, side left_right, uplo upper_lower, +void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(backend_selector selector, side left_right, uplo upper_lower, +void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(backend_selector selector, side left_right, uplo upper_lower, +void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(backend_selector selector, side left_right, uplo upper_lower, +void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void dotu(backend_selector selector, std::int64_t n, +void dotu(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& result) { - oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } -void dotu(backend_selector selector, std::int64_t n, +void dotu(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& result) { - oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } -void hemm(backend_selector selector, side left_right, uplo upper_lower, +void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(backend_selector selector, side left_right, uplo upper_lower, +void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a) { - oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + oneapi::math::blas::generic::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } -void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& a) { - oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + oneapi::math::blas::generic::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } -void gbmv(backend_selector selector, transpose trans, std::int64_t m, +void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } -void gbmv(backend_selector selector, transpose trans, std::int64_t m, +void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } -void gbmv(backend_selector selector, transpose trans, std::int64_t m, +void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } -void gbmv(backend_selector selector, transpose trans, std::int64_t m, +void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } -void tbmv(backend_selector selector, uplo upper_lower, transpose trans, +void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbmv(backend_selector selector, uplo upper_lower, transpose trans, +void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbmv(backend_selector selector, uplo upper_lower, transpose trans, +void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbmv(backend_selector selector, uplo upper_lower, transpose trans, +void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void symm(backend_selector selector, side left_right, uplo upper_lower, +void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(backend_selector selector, side left_right, uplo upper_lower, +void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(backend_selector selector, side left_right, uplo upper_lower, +void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(backend_selector selector, side left_right, uplo upper_lower, +void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, + alpha, a, lda, b, ldb, beta, c, ldc); } -void dotc(backend_selector selector, std::int64_t n, +void dotc(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& result) { - oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } -void dotc(backend_selector selector, std::int64_t n, +void dotc(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, sycl::buffer, 1>& result) { - oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } -void syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, +void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a, lda); + oneapi::math::blas::generic::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, + lda); } -void syr(backend_selector selector, uplo upper_lower, std::int64_t n, +void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a, lda); + oneapi::math::blas::generic::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, + lda); } -void trmm(backend_selector selector, side left_right, uplo upper_lower, +void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(backend_selector selector, side left_right, uplo upper_lower, +void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(backend_selector selector, side left_right, uplo upper_lower, +void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(backend_selector selector, side left_right, uplo upper_lower, +void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer& d1, +void rotmg(backend_selector selector, sycl::buffer& d1, sycl::buffer& d2, sycl::buffer& x1, float y1, sycl::buffer& param) { - oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); + oneapi::math::blas::generic::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer& d1, +void rotmg(backend_selector selector, sycl::buffer& d1, sycl::buffer& d2, sycl::buffer& x1, double y1, sycl::buffer& param) { - oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); + oneapi::math::blas::generic::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void tpsv(backend_selector selector, uplo upper_lower, transpose trans, +void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpsv(backend_selector selector, uplo upper_lower, transpose trans, +void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpsv(backend_selector selector, uplo upper_lower, transpose trans, +void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void tpsv(backend_selector selector, uplo upper_lower, transpose trans, +void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, x, incx); + oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, x, incx); } -void trsv(backend_selector selector, uplo upper_lower, transpose trans, +void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trsv(backend_selector selector, uplo upper_lower, transpose trans, +void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trsv(backend_selector selector, uplo upper_lower, transpose trans, +void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void trsv(backend_selector selector, uplo upper_lower, transpose trans, +void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, +void copy(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, +void copy(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); + oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy_batch(backend_selector selector, std::int64_t n, +void copy_batch(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } -void copy_batch(backend_selector selector, std::int64_t n, +void copy_batch(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } -void copy_batch(backend_selector selector, std::int64_t n, +void copy_batch(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } -void copy_batch(backend_selector selector, std::int64_t n, +void copy_batch(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + oneapi::math::blas::generic::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } -void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } -void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, +void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } -void gemmt(backend_selector selector, uplo upper_lower, transpose transa, +void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(backend_selector selector, uplo upper_lower, transpose transa, +void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(backend_selector selector, uplo upper_lower, transpose transa, +void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(backend_selector selector, uplo upper_lower, transpose transa, +void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } -void asum(backend_selector selector, std::int64_t n, +void asum(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, +void asum(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } -void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, + x, incx, beta, y, incy); } -void tbsv(backend_selector selector, uplo upper_lower, transpose trans, +void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbsv(backend_selector selector, uplo upper_lower, transpose trans, +void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbsv(backend_selector selector, uplo upper_lower, transpose trans, +void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void tbsv(backend_selector selector, uplo upper_lower, transpose trans, +void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, - n, k, a, lda, x, incx); + oneapi::math::blas::generic::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, + k, a, lda, x, incx); } -void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a) { - oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + oneapi::math::blas::generic::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } -void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, +void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a) { - oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + oneapi::math::blas::generic::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, +void iamax(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, +void iamax(backend_selector selector, std::int64_t n, sycl::buffer, 1>& x, std::int64_t incx, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); + oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { - oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); + oneapi::math::blas::generic::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { - oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); + oneapi::math::blas::generic::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); + oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, +void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } -void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, +void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } -void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, +void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } -void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, +void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } -void her2k(backend_selector selector, uplo upper_lower, transpose trans, +void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void her2k(backend_selector selector, uplo upper_lower, transpose trans, +void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::generic::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer& a, +void rotg(backend_selector selector, sycl::buffer& a, sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer& a, +void rotg(backend_selector selector, sycl::buffer& a, sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1>& a, +void rotg(backend_selector selector, sycl::buffer, 1>& a, sycl::buffer, 1>& b, sycl::buffer& c, sycl::buffer, 1>& s) { - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1>& a, +void rotg(backend_selector selector, sycl::buffer, 1>& a, sycl::buffer, 1>& b, sycl::buffer& c, sycl::buffer, 1>& s) { - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void symv(backend_selector selector, uplo upper_lower, std::int64_t n, +void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } -void symv(backend_selector selector, uplo upper_lower, std::int64_t n, +void symv(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::generic::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, + incx, beta, y, incy); } -void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch( - selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch( - selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch( - selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch( - selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::generic::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, stride, batch_size); + oneapi::math::blas::generic::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, + lda, ldb, stride, batch_size); } -void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, stride, batch_size); + oneapi::math::blas::generic::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, + lda, ldb, stride, batch_size); } -void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, stride, batch_size); + oneapi::math::blas::generic::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, + lda, ldb, stride, batch_size); } -void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, stride, batch_size); + oneapi::math::blas::generic::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, + lda, ldb, stride, batch_size); } -void omatadd_batch(backend_selector selector, transpose transa, transpose transb, +void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(backend_selector selector, transpose transa, transpose transb, +void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(backend_selector selector, transpose transa, transpose transb, +void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(backend_selector selector, transpose transa, transpose transb, +void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + oneapi::math::blas::generic::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } -void omatcopy(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } -void omatcopy(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } -void omatcopy(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } -void omatcopy(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } -void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { - oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + oneapi::math::blas::generic::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } -void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { - oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + oneapi::math::blas::generic::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } -void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { - oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + oneapi::math::blas::generic::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } -void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { - oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + oneapi::math::blas::generic::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, + stridea, b, ldb, strideb); } -void imatcopy(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, + ldb); } -void imatcopy(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, + ldb); } -void imatcopy(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, + ldb); } -void imatcopy(backend_selector selector, transpose trans, std::int64_t m, +void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { - oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, + ldb); } -void omatadd(backend_selector selector, transpose transa, transpose transb, +void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } -void omatadd(backend_selector selector, transpose transa, transpose transb, +void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } -void omatadd(backend_selector selector, transpose transa, transpose transb, +void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } -void omatadd(backend_selector selector, transpose transa, transpose transb, +void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2( + auto done = oneapi::math::blas::generic::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2( + auto done = oneapi::math::blas::generic::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, double alpha, +sycl::event scal(backend_selector selector, std::int64_t n, double alpha, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, +sycl::event scal(backend_selector selector, std::int64_t n, std::complex alpha, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, +sycl::event scal(backend_selector selector, std::int64_t n, std::complex alpha, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, double alpha, +sycl::event scal(backend_selector selector, std::int64_t n, double alpha, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, + dependencies); return done; } -sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmv( + auto done = oneapi::math::blas::generic::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmv( + auto done = oneapi::math::blas::generic::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmv( + auto done = oneapi::math::blas::generic::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmv( + auto done = oneapi::math::blas::generic::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* x, std::int64_t incx, float* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, dependencies); return done; } -sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* x, std::int64_t incx, double* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, dependencies); return done; } -sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* a, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpmv( + auto done = oneapi::math::blas::generic::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* a, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpmv( + auto done = oneapi::math::blas::generic::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk( + auto done = oneapi::math::blas::generic::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta, double* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk( + auto done = oneapi::math::blas::generic::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk( + auto done = oneapi::math::blas::generic::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk( + auto done = oneapi::math::blas::generic::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, transpose* trans, std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, transpose* trans, std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo upper_lower, +sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( + auto done = oneapi::math::blas::generic::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her2( + auto done = oneapi::math::blas::generic::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her2( + auto done = oneapi::math::blas::generic::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex* x, std::int64_t incx, std::complex* y, - std::int64_t incy, float c, float s, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float* x, +sycl::event rot(backend_selector selector, std::int64_t n, float* x, std::int64_t incx, float* y, std::int64_t incy, float c, float s, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double* x, +sycl::event rot(backend_selector selector, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, double c, double s, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, +sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, const float* x, std::int64_t incx, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, - y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, + incy, dependencies); return done; } -sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, +sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, const double* x, std::int64_t incx, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, - y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, + incy, dependencies); return done; } -sycl::event axpy(backend_selector selector, std::int64_t n, +sycl::event axpy(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, - y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, + incy, dependencies); return done; } -sycl::event axpy(backend_selector selector, std::int64_t n, +sycl::event axpy(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, - y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, const float** x, std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, const double** x, std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t* n, +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, std::complex* alpha, const std::complex** x, std::int64_t* incx, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t* n, +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, std::complex* alpha, const std::complex** x, std::int64_t* incx, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, +sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, +sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t n, +sycl::event axpy_batch(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t n, +sycl::event axpy_batch(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } -sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, +sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, + beta, y, incy, dependencies); return done; } -sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, +sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, const double* x, std::int64_t incx, const double beta, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, + beta, y, incy, dependencies); return done; } -sycl::event axpby(backend_selector selector, std::int64_t n, +sycl::event axpby(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, + beta, y, incy, dependencies); return done; } -sycl::event axpby(backend_selector selector, std::int64_t n, +sycl::event axpby(backend_selector selector, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, + beta, y, incy, dependencies); return done; } -sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, - incx, y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, - incx, y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv( + auto done = oneapi::math::blas::generic::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv( + auto done = oneapi::math::blas::generic::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv( + auto done = oneapi::math::blas::generic::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv( + auto done = oneapi::math::blas::generic::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float* a, - std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double* a, - std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex* a, std::int64_t lda, std::int64_t stridea, - const std::complex* x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex* y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex* a, std::int64_t lda, std::int64_t stridea, - const std::complex* x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex* y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose* trans, +sycl::event gemv_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose* trans, +sycl::event gemv_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose* trans, +sycl::event gemv_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, const std::complex** x, std::int64_t* incx, std::complex* beta, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose* trans, +sycl::event gemv_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, const std::complex** x, std::int64_t* incx, std::complex* beta, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, - std::int64_t stridea, const float* x, std::int64_t incx, - std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, - std::int64_t stridea, const double* x, std::int64_t incx, - std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex* a, - std::int64_t lda, std::int64_t stridea, const std::complex* x, - std::int64_t incx, std::int64_t stridex, std::complex* c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex* a, - std::int64_t lda, std::int64_t stridea, const std::complex* x, - std::int64_t incx, std::int64_t stridex, std::complex* c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side* left_right, +sycl::event dgmm_batch(backend_selector selector, side* left_right, std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side* left_right, +sycl::event dgmm_batch(backend_selector selector, side* left_right, std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side* left_right, +sycl::event dgmm_batch(backend_selector selector, side* left_right, std::int64_t* m, std::int64_t* n, const std::complex** a, std::int64_t* lda, const std::complex** x, std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side* left_right, +sycl::event dgmm_batch(backend_selector selector, side* left_right, std::int64_t* m, std::int64_t* n, const std::complex** a, std::int64_t* lda, const std::complex** x, std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( + auto done = oneapi::math::blas::generic::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const std::complex* x, std::int64_t incx, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, lda, dependencies); return done; } -sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const std::complex* x, std::int64_t incx, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, lda, dependencies); return done; } -sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const std::complex* x, std::int64_t incx, std::complex* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, dependencies); return done; } -sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const std::complex* x, std::int64_t incx, std::complex* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, +sycl::event iamin(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, +sycl::event iamin(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamin(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, std::int64_t* lda, const std::complex** b, std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose* transa, +sycl::event gemm_batch(backend_selector selector, transpose* transa, transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, std::int64_t* lda, const std::complex** b, std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose transa, +sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( + auto done = oneapi::math::blas::generic::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* a, const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spmv( + auto done = oneapi::math::blas::generic::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* a, const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spmv( + auto done = oneapi::math::blas::generic::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float* x, +sycl::event swap(backend_selector selector, std::int64_t n, float* x, std::int64_t incx, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double* x, +sycl::event swap(backend_selector selector, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, - incx, y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, - incx, y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, +sycl::event nrm2(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, +sycl::event nrm2(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta, sycl::half* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm(backend_selector selector, transpose transa, transpose transb, +sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + auto done = + oneapi::math::blas::generic::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemm_bias(backend_selector selector, transpose transa, +sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( + auto done = oneapi::math::blas::generic::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -sycl::event gemm_bias(backend_selector selector, transpose transa, +sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( + auto done = oneapi::math::blas::generic::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -sycl::event gemm_bias(backend_selector selector, transpose transa, +sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( + auto done = oneapi::math::blas::generic::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -sycl::event gemm_bias(backend_selector selector, transpose transa, +sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( + auto done = oneapi::math::blas::generic::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, const std::complex* a, std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::herk( + auto done = oneapi::math::blas::generic::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, const std::complex* a, std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::herk( + auto done = oneapi::math::blas::generic::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, +sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } -sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trsm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side left_right, +sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side left_right, +sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side left_right, +sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side left_right, +sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side* left_right, +sycl::event trsm_batch(backend_selector selector, side* left_right, uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side* left_right, +sycl::event trsm_batch(backend_selector selector, side* left_right, uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side* left_right, +sycl::event trsm_batch(backend_selector selector, side* left_right, uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side* left_right, +sycl::event trsm_batch(backend_selector selector, side* left_right, uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( + auto done = oneapi::math::blas::generic::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event dotu(backend_selector selector, std::int64_t n, +sycl::event dotu(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event dotu(backend_selector selector, std::int64_t n, +sycl::event dotu(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hemm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hemm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hpr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::hpr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, +sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbmv( + auto done = oneapi::math::blas::generic::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbmv( + auto done = oneapi::math::blas::generic::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbmv( + auto done = oneapi::math::blas::generic::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbmv( + auto done = oneapi::math::blas::generic::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } -sycl::event dotc(backend_selector selector, std::int64_t n, +sycl::event dotc(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event dotc(backend_selector selector, std::int64_t n, +sycl::event dotc(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, const std::complex* y, std::int64_t incy, std::complex* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, lda, dependencies); return done; } -sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, - alpha, x, incx, a, lda, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, + x, incx, a, lda, dependencies); return done; } -sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, +sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::trmm(selector.get_queue(), left_right, + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, float y1, float* param, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, double y1, double* param, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, + unit_diag, n, a, x, incx, dependencies); return done; } -sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsv( + auto done = oneapi::math::blas::generic::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsv( + auto done = oneapi::math::blas::generic::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsv( + auto done = oneapi::math::blas::generic::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::trsv( + auto done = oneapi::math::blas::generic::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float* x, +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double* x, +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, +sycl::event copy(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, +sycl::event copy(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t* n, +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t* n, +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const double** x, std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t* n, +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const std::complex** x, std::int64_t* incx, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t* n, +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const std::complex** x, std::int64_t* incx, std::complex** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double* x, std::int64_t incx, std::int64_t stridex, double* y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( + auto done = oneapi::math::blas::generic::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hemv( + auto done = oneapi::math::blas::generic::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, std::complex* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hemv( + auto done = oneapi::math::blas::generic::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, - transa, transb, n, k, alpha, a, lda, b, - ldb, beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, - transa, transb, n, k, alpha, a, lda, b, - ldb, beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, - transa, transb, n, k, alpha, a, lda, b, - ldb, beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, - transa, transb, n, k, alpha, a, lda, b, - ldb, beta, c, ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::generic::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, +sycl::event asum(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, +sycl::event asum(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float* x, +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double* x, +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::asum(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbsv( + auto done = oneapi::math::blas::generic::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbsv( + auto done = oneapi::math::blas::generic::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbsv( + auto done = oneapi::math::blas::generic::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, std::complex* x, std::int64_t incx, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::tbsv( + auto done = oneapi::math::blas::generic::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::spr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::spr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, +sycl::event iamax(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, +sycl::event iamax(backend_selector selector, std::int64_t n, const std::complex* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + auto done = oneapi::math::blas::generic::MAJOR::iamax(selector.get_queue(), n, x, incx, result, + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float* x, +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, std::int64_t incx, float* y, std::int64_t incy, float* param, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double* x, +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, double* param, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float* a, float* b, float* c, +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double* a, double* b, double* c, +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, double* s, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex* a, +sycl::event rotg(backend_selector selector, std::complex* a, std::complex* b, float* c, std::complex* s, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex* a, +sycl::event rotg(backend_selector selector, std::complex* a, std::complex* b, double* c, std::complex* s, const std::vector& dependencies) { auto done = - oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); + oneapi::math::blas::generic::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, +sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, + incy, result, dependencies); return done; } -sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::her2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, +sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::her2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float* x, +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double* x, +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float* x, +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, std::int64_t incx, const float* y, std::int64_t incy, double* result, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, + result, dependencies); return done; } -sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symv( + auto done = oneapi::math::blas::generic::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, +sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::symv( + auto done = oneapi::math::blas::generic::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose trans, +sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose trans, +sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose trans, +sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose trans, +sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose trans, +sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose trans, +sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose trans, +sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose trans, +sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -sycl::event omatadd_batch(backend_selector selector, transpose transa, +sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event omatadd_batch(backend_selector selector, transpose transa, +sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event omatadd_batch(backend_selector selector, transpose transa, +sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event omatadd_batch(backend_selector selector, transpose transa, +sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex* b, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, +sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } -sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } -sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } -sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, +sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } -sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, - std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, - std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, - m, n, alpha, a, lda, beta, b, ldb, c, - ldc, dependencies); +sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } -sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, - std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, - std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, - m, n, alpha, a, lda, beta, b, ldb, c, - ldc, dependencies); +sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } -sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, +sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, - m, n, alpha, a, lda, beta, b, ldb, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } -sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, +sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, std::complex* c, std::int64_t ldc, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, - m, n, alpha, a, lda, beta, b, ldb, c, - ldc, dependencies); + auto done = oneapi::math::blas::generic::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose* trans, +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose* trans, +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose* trans, +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose* trans, +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose* trans, +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose* trans, +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose* trans, +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose* trans, +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( + auto done = oneapi::math::blas::generic::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; diff --git a/include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp b/include/oneapi/math/blas/detail/generic/onemath_blas_generic.hpp similarity index 89% rename from include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp rename to include/oneapi/math/blas/detail/generic/onemath_blas_generic.hpp index 52420dc85..95b297678 100644 --- a/include/oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp +++ b/include/oneapi/math/blas/detail/generic/onemath_blas_generic.hpp @@ -17,8 +17,8 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -#ifndef _ONEMATH_BLAS_PORTBLAS_HPP_ -#define _ONEMATH_BLAS_PORTBLAS_HPP_ +#ifndef _ONEMATH_BLAS_GENERIC_BLAS_HPP_ +#define _ONEMATH_BLAS_GENERIC_BLAS_HPP_ #if __has_include() #include @@ -42,7 +42,7 @@ using oneapi::math::diag; using oneapi::math::offset; namespace blas { -namespace portblas { +namespace generic { namespace column_major { #include "oneapi/math/blas/detail/onemath_blas_backends.hxx" @@ -53,9 +53,9 @@ namespace row_major { #include "oneapi/math/blas/detail/onemath_blas_backends.hxx" } //namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi -#endif // _ONEMATH_BLAS_PORTBLAS_HPP_ +#endif // _ONEMATH_BLAS_GENERIC_BLAS_HPP_ diff --git a/include/oneapi/math/detail/backends.hpp b/include/oneapi/math/detail/backends.hpp index 58f61e4c1..6cd98d12c 100644 --- a/include/oneapi/math/detail/backends.hpp +++ b/include/oneapi/math/detail/backends.hpp @@ -36,7 +36,7 @@ enum class backend { netlib, rocblas, rocrand, - portblas, + generic, cufft, rocfft, portfft, @@ -59,7 +59,7 @@ static backendmap backend_map = { { backend::mklcpu, "mklcpu" }, { backend::rocblas, "rocblas" }, { backend::rocrand, "rocrand" }, { backend::rocsolver, "rocsolver" }, - { backend::portblas, "portblas" }, + { backend::generic, "generic" }, { backend::cufft, "cufft" }, { backend::rocfft, "rocfft" }, { backend::portfft, "portfft" }, diff --git a/include/oneapi/math/detail/backends_table.hpp b/include/oneapi/math/detail/backends_table.hpp index 3e83b070f..f42f44354 100644 --- a/include/oneapi/math/detail/backends_table.hpp +++ b/include/oneapi/math/detail/backends_table.hpp @@ -53,8 +53,8 @@ static std::map>> libraries = #ifdef ONEMATH_ENABLE_NETLIB_BACKEND LIB_NAME("blas_netlib"), #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU - LIB_NAME("blas_portblas"), +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU + LIB_NAME("blas_generic"), #endif } }, { device::intelgpu, @@ -62,8 +62,8 @@ static std::map>> libraries = #ifdef ONEMATH_ENABLE_MKLGPU_BACKEND LIB_NAME("blas_mklgpu"), #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU - LIB_NAME("blas_portblas"), +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU + LIB_NAME("blas_generic"), #endif } }, { device::amdgpu, @@ -71,8 +71,8 @@ static std::map>> libraries = #ifdef ONEMATH_ENABLE_ROCBLAS_BACKEND LIB_NAME("blas_rocblas"), #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU - LIB_NAME("blas_portblas"), +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU + LIB_NAME("blas_generic"), #endif } }, { device::nvidiagpu, @@ -80,14 +80,14 @@ static std::map>> libraries = #ifdef ONEMATH_ENABLE_CUBLAS_BACKEND LIB_NAME("blas_cublas"), #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU - LIB_NAME("blas_portblas"), +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU + LIB_NAME("blas_generic"), #endif } }, { device::generic_device, { -#ifdef ENABLE_PORTBLAS_BACKEND - LIB_NAME("blas_portblas"), +#ifdef ENABLE_GENERIC_BLAS_BACKEND + LIB_NAME("blas_generic"), #endif } } } }, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f6d960f4..f9fa22c02 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -86,7 +86,7 @@ function(generate_header_file) set(ONEMATH_ENABLE_CUBLAS_BACKEND ${ENABLE_CUBLAS_BACKEND}) set(ONEMATH_ENABLE_ROCBLAS_BACKEND ${ENABLE_ROCBLAS_BACKEND}) set(ONEMATH_ENABLE_NETLIB_BACKEND ${ENABLE_NETLIB_BACKEND}) - set(ONEMATH_ENABLE_PORTBLAS_BACKEND ${ENABLE_PORTBLAS_BACKEND}) + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND ${ENABLE_GENERIC_BLAS_BACKEND}) set(ONEMATH_ENABLE_CURAND_BACKEND ${ENABLE_CURAND_BACKEND}) set(ONEMATH_ENABLE_ROCRAND_BACKEND ${ENABLE_ROCRAND_BACKEND}) set(ONEMATH_ENABLE_CUSOLVER_BACKEND ${ENABLE_CUSOLVER_BACKEND}) diff --git a/src/blas/backends/CMakeLists.txt b/src/blas/backends/CMakeLists.txt index ac7595fc6..710dc9347 100644 --- a/src/blas/backends/CMakeLists.txt +++ b/src/blas/backends/CMakeLists.txt @@ -40,6 +40,6 @@ if(ENABLE_ROCBLAS_BACKEND AND UNIX) add_subdirectory(rocblas) endif() -if(ENABLE_PORTBLAS_BACKEND AND UNIX) - add_subdirectory(portblas) +if(ENABLE_GENERIC_BLAS_BACKEND AND UNIX) + add_subdirectory(generic) endif() diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/generic/CMakeLists.txt similarity index 54% rename from src/blas/backends/portblas/CMakeLists.txt rename to src/blas/backends/generic/CMakeLists.txt index f2c38f9b3..4c998d79c 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/generic/CMakeLists.txt @@ -17,13 +17,17 @@ # #========================================================================= -set(LIB_NAME onemath_blas_portblas) +set(LIB_NAME onemath_blas_generic) set(LIB_OBJ ${LIB_NAME}_obj) -if(NOT DEFINED PORTBLAS_TUNING_TARGET) - option(PORTBLAS_TUNING_TARGET "Set a TUNING_TARGET for portBLAS" "") +if(NOT DEFINED GENERIC_BLAS_TUNING_TARGET) + option(GENERIC_BLAS_TUNING_TARGET "Set a TUNING_TARGET for generic backend" "") endif() +if(DEFINED PORTBLAS_TUNING_TARGET) + message(WARNING "PORTBLAS_TUNING_TARGET is deprecated, please use GENERIC_BLAS_TUNING_TARGET.") + set(GENERIC_BLAS_TUNING_TARGET ${PORTBLAS_TUNING_TARGET}) +endif() # Parse compiler flags and return a list of SYCL targets # The list is empty if no targets are set function(get_sycl_targets FLAGS) @@ -33,9 +37,9 @@ function(get_sycl_targets FLAGS) set(SYCL_TARGETS ${SYCL_TARGETS} PARENT_SCOPE) endfunction(get_sycl_targets) -# portBLAS supports tuning for some device types, but can only be compiled -# for one at a time currently. Work out which device to tune for based on the -# DPC++ target triple specified via -fsycl-targets +# Generic blas backend supports tuning for some device types, but can only be +# compiled for one at a time currently. Work out which device to tune for based +# on the DPC++ target triple specified via -fsycl-targets if(TARGET ONEMATH::SYCL::SYCL) get_target_property(ONEMATH_COMPILE_OPTIONS ONEMATH::SYCL::SYCL INTERFACE_COMPILE_OPTIONS) endif() @@ -46,20 +50,20 @@ if(NUM_TARGETS EQUAL 0) list(LENGTH SYCL_TARGETS NUM_TARGETS) endif() -if(PORTBLAS_TUNING_TARGET) +if(GENERIC_BLAS_TUNING_TARGET) # Allow the user to manually enable a specific device type - # for tuned portBLAS configurations and sets sycl-target. - if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") - set(PORTBLAS_TUNING_TARGET "") + # for tuned generic backend configurations and sets sycl-target. + if(GENERIC_BLAS_TUNING_TARGET STREQUAL "INTEL_CPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") + set(GENERIC_BLAS_TUNING_TARGET "") target_compile_options(ONEMATH::SYCL::SYCL INTERFACE -fsycl-targets=spir64_x86_64 -fsycl-unnamed-lambda) target_link_options(ONEMATH::SYCL::SYCL INTERFACE -fsycl-targets=spir64_x86_64) - elseif(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_GPU") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") - elseif(PORTBLAS_TUNING_TARGET STREQUAL "AMD_GPU") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") + elseif(GENERIC_BLAS_TUNING_TARGET STREQUAL "INTEL_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") + elseif(GENERIC_BLAS_TUNING_TARGET STREQUAL "AMD_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") if (is_dpcpp) target_compile_options(ONEMATH::SYCL::SYCL INTERFACE -fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda @@ -68,11 +72,11 @@ if(PORTBLAS_TUNING_TARGET) -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${HIP_TARGETS}) else() message(WARNING "Compiler is not supported." - " Unable to automatically set the required flags for the target '${PORTBLAS_TUNING_TARGET}'." + " Unable to automatically set the required flags for the target '${GENERIC_BLAS_TUNING_TARGET}'." " Compilation may fail.") endif() - elseif(PORTBLAS_TUNING_TARGET STREQUAL "NVIDIA_GPU") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") + elseif(GENERIC_BLAS_TUNING_TARGET STREQUAL "NVIDIA_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") if (is_dpcpp) target_compile_options(ONEMATH::SYCL::SYCL INTERFACE -fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda) @@ -86,95 +90,91 @@ if(PORTBLAS_TUNING_TARGET) endif() else() message(WARNING "Compiler is not supported." - " Unable to automatically set the required flags for the target '${PORTBLAS_TUNING_TARGET}'." + " Unable to automatically set the required flags for the target '${GENERIC_BLAS_TUNING_TARGET}'." " Compilation may fail.") endif() else() - message(FATAL_ERROR "Unsupported PORTBLAS_TUNING_TARGET: '${PORTBLAS_TUNING_TARGET}'") + message(FATAL_ERROR "Unsupported GENERIC_BLAS_TUNING_TARGET: '${GENERIC_BLAS_TUNING_TARGET}'") endif() elseif(NUM_TARGETS EQUAL 0) - # Enable portBLAS backend for all devices types - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") + # Enable generic backend for all devices types + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") else() - # Try to automatically detect the PORTBLAS_TUNING_TARGET + # Try to automatically detect the GENERIC_BLAS_TUNING_TARGET foreach(SYCL_TARGET IN LISTS SYCL_TARGETS) if(SYCL_TARGETS MATCHES "^intel_gpu" OR SYCL_TARGETS MATCHES "^spir64_gen") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") - set(PORTBLAS_TUNING_TARGET "INTEL_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") + set(GENERIC_BLAS_TUNING_TARGET "INTEL_GPU") elseif(SYCL_TARGETS MATCHES "^spir64_x86_64") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") elseif(SYCL_TARGETS MATCHES "^spir64") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") - set(PORTBLAS_TUNING_TARGET "INTEL_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU "ON" CACHE INTERNAL "") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU "ON" CACHE INTERNAL "") + set(GENERIC_BLAS_TUNING_TARGET "INTEL_GPU") elseif(SYCL_TARGETS MATCHES "^amd_gpu" OR SYCL_TARGETS MATCHES "-amd-") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") - set(PORTBLAS_TUNING_TARGET "AMD_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU "ON" CACHE INTERNAL "") + set(GENERIC_BLAS_TUNING_TARGET "AMD_GPU") elseif(SYCL_TARGETS MATCHES "^nvidia_gpu" OR SYCL_TARGETS MATCHES "-nvidia-") - set(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") - set(PORTBLAS_TUNING_TARGET "NVIDIA_GPU") + set(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU "ON" CACHE INTERNAL "") + set(GENERIC_BLAS_TUNING_TARGET "NVIDIA_GPU") endif() endforeach() - # Currently portBLAS can only be tuned for one type of device. + # Currently generic can only be tuned for one type of device. if(NUM_TARGETS GREATER 1) - set(PORTBLAS_TUNING_TARGET "") + set(GENERIC_BLAS_TUNING_TARGET "") endif() endif() -if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_GPU") - message(STATUS "Tuning portBLAS for Intel GPU devices") -elseif(PORTBLAS_TUNING_TARGET STREQUAL "AMD_GPU") - message(STATUS "Tuning portBLAS for AMD GPU devices") -elseif(PORTBLAS_TUNING_TARGET STREQUAL "NVIDIA_GPU") - message(STATUS "Tuning portBLAS for Nvidia GPU devices") +if(GENERIC_BLAS_TUNING_TARGET STREQUAL "INTEL_GPU") + message(STATUS "Tuning generic BLAS for Intel GPU devices") +elseif(GENERIC_BLAS_TUNING_TARGET STREQUAL "AMD_GPU") + message(STATUS "Tuning generic BLAS for AMD GPU devices") +elseif(GENERIC_BLAS_TUNING_TARGET STREQUAL "NVIDIA_GPU") + message(STATUS "Tuning generic BLAS for Nvidia GPU devices") else() - message(STATUS "portBLAS is not tuned for any device which can impact performance") + message(STATUS "generic BLAS is not tuned for any device which can impact performance") endif() -# If find_package doesn't work, download portBLAS from Github. This is +# If find_package doesn't work, download onemath_sycl_blas from Github. This is # intended to make oneMath easier to use. -message(STATUS "Looking for portBLAS") -find_package(PORTBLAS QUIET) -if (NOT PORTBLAS_FOUND) - message(STATUS "Looking for portBLAS - could not find portBLAS with PORTBLAS_DIR") +message(STATUS "Looking for oneMATH blas kernels") +find_package(ONEMATH_SYCL_BLAS QUIET) +if (NOT ONEMATH_SYCL_BLAS_FOUND) + message(STATUS "Looking for onemath_sycl_blas for generic backend - could not find onemath_sycl_blas with ONEMATH_SYCL_BLAS_DIR") include(FetchContent) - set(INSTALL_HEADER_ONLY ON) - set(BLAS_BUILD_SAMPLES OFF) - set(BLAS_ENABLE_BENCHMARK OFF) - set(BLAS_ENABLE_TESTING OFF) - set(ENABLE_EXPRESSION_TESTS OFF) - if(NOT PORTBLAS_TUNING_TARGET) - set(PORTBLAS_TUNING_TARGET "DEFAULT") + if(NOT GENERIC_BLAS_TUNING_TARGET) + set(GENERIC_BLAS_TUNING_TARGET "DEFAULT") endif() - # Following variable TUNING_TARGET will be used in portBLAS internal configuration - set(TUNING_TARGET ${PORTBLAS_TUNING_TARGET}) + # Following variable TUNING_TARGET will be used in generic blas internal configuration + set(TUNING_TARGET ${GENERIC_BLAS_TUNING_TARGET}) set(BLAS_ENABLE_COMPLEX ON) - # Set the policy to forward variables to portBLAS configure step + # Set the policy to forward variables to generic blas configure step set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/deps") FetchContent_Declare( - portBLAS - GIT_REPOSITORY https://github.com/codeplaysoftware/portBLAS + onemath_sycl_blas + GIT_REPOSITORY https://github.com/uxlfoundation/generic-sycl-components GIT_TAG main + SOURCE_SUBDIR onemath/sycl/blas ) - FetchContent_MakeAvailable(portblas) - message(STATUS "Looking for portBLAS - downloaded") + FetchContent_MakeAvailable(onemath_sycl_blas) + message(STATUS "Looking for onemath_sycl_blas - downloaded") else() - message(STATUS "Looking for portBLAS - found") - add_library(portblas ALIAS PORTBLAS::portblas) + message(STATUS "Looking for oneMath blas kernels - found") + add_library(onemath_sycl_blas ALIAS ONEMATH_SYCL_BLAS::onemath_sycl_blas) endif() set(SOURCES - portblas_level1_double.cpp portblas_level1_float.cpp - portblas_level2_double.cpp portblas_level2_float.cpp - portblas_level3_double.cpp portblas_level3_float.cpp - portblas_level3_half.cpp portblas_level3_bfloat16.cpp - portblas_batch.cpp - $<$: portblas_wrappers.cpp>) + generic_level1_double.cpp generic_level1_float.cpp + generic_level2_double.cpp generic_level2_float.cpp + generic_level3_double.cpp generic_level3_float.cpp + generic_level3_half.cpp generic_level3_bfloat16.cpp + generic_batch.cpp + $<$: generic_wrappers.cpp>) add_library(${LIB_NAME}) add_deprecated_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT ${SOURCES}) @@ -193,7 +193,7 @@ target_include_directories(${LIB_OBJ} ) target_compile_options(${LIB_OBJ} PRIVATE ${ONEMATH_BUILD_COPT}) -target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL portblas) +target_link_libraries(${LIB_OBJ} PUBLIC ONEMATH::SYCL::SYCL onemath_sycl_blas) set_target_properties(${LIB_OBJ} PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/src/blas/backends/portblas/portblas_batch.cpp b/src/blas/backends/generic/generic_batch.cpp similarity index 86% rename from src/blas/backends/portblas/portblas_batch.cpp rename to src/blas/backends/generic/generic_batch.cpp index d80207c8c..c0ad8f5e1 100644 --- a/src/blas/backends/portblas/portblas_batch.cpp +++ b/src/blas/backends/generic/generic_batch.cpp @@ -23,21 +23,21 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { namespace column_major { #define COLUMN_MAJOR constexpr bool is_column_major() { return true; } -#include "portblas_batch.cxx" +#include "generic_batch.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -47,11 +47,11 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_batch.cxx" +#include "generic_batch.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_batch.cxx b/src/blas/backends/generic/generic_batch.cxx similarity index 96% rename from src/blas/backends/portblas/portblas_batch.cxx rename to src/blas/backends/generic/generic_batch.cxx index 75b1a115b..7a6af6070 100644 --- a/src/blas/backends/portblas/portblas_batch.cxx +++ b/src/blas/backends/generic/generic_batch.cxx @@ -118,15 +118,15 @@ void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, - batch_size); + CALL_GENERIC_BLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size); } void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, - batch_size); + CALL_GENERIC_BLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, + batch_size); } void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -172,8 +172,8 @@ void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, - stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, + lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, @@ -182,8 +182,8 @@ void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, - stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, + lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, @@ -277,16 +277,16 @@ void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int6 std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, - stride_b, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, + ldb, stride_b, batch_size); } void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, - stride_b, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, + ldb, stride_b, batch_size); } void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, @@ -337,8 +337,8 @@ void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, - beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, + stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, @@ -347,8 +347,8 @@ void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { - CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, - beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); + CALL_GENERIC_BLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, + stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, @@ -605,16 +605,16 @@ sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, const fl std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, - batch_size, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies); } sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, - batch_size, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies); } sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -764,9 +764,9 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, - lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, + a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies); } sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, @@ -776,9 +776,9 @@ sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, - lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, + a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies); } sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, @@ -920,8 +920,8 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, st std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size, dependencies); } sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, @@ -929,8 +929,8 @@ sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, st std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size, dependencies); } sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, @@ -983,9 +983,9 @@ sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, - stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, + stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies); } sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, @@ -994,9 +994,9 @@ sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, double beta, const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, - stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, + stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies); } sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, diff --git a/src/blas/backends/portblas/portblas_common.hpp b/src/blas/backends/generic/generic_common.hpp similarity index 64% rename from src/blas/backends/portblas/portblas_common.hpp rename to src/blas/backends/generic/generic_common.hpp index c2713c2f5..3b9726f58 100644 --- a/src/blas/backends/portblas/portblas_common.hpp +++ b/src/blas/backends/generic/generic_common.hpp @@ -17,10 +17,10 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -#ifndef _PORTBLAS_COMMON_HPP_ -#define _PORTBLAS_COMMON_HPP_ +#ifndef _GENERIC_BLAS_COMMON_HPP_ +#define _GENERIC_BLAS_COMMON_HPP_ -#include "portblas.hpp" +#include "onemath_sycl_blas.hpp" #include "oneapi/math/types.hpp" #include "oneapi/math/exceptions.hpp" @@ -30,13 +30,13 @@ namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { namespace detail { -// portBLAS handle type. Constructed with sycl::queue. +// onemath_sycl_blas handle type. Constructed with sycl::queue. using handle_t = ::blas::SB_Handle; -// portBLAS buffer iterator. Constructed with sycl::buffer +// onemath_sycl_blas buffer iterator. Constructed with sycl::buffer template using buffer_iterator_t = ::blas::BufferIterator; @@ -44,76 +44,76 @@ using buffer_iterator_t = ::blas::BufferIterator; template using sycl_complex_t = sycl::ext::oneapi::experimental::complex; -/** A trait for obtaining equivalent portBLAS API types from oneMath API +/** A trait for obtaining equivalent onemath_sycl_blas API types from oneMath API * types. * * @tparam InputT is the oneMath type. - * portblas_type::type should be the equivalent portBLAS type. + * generic_type::type should be the equivalent onemath_sycl_blas type. **/ template -struct portblas_type; +struct generic_type; -#define DEF_PORTBLAS_TYPE(onemath_t, portblas_t) \ - template <> \ - struct portblas_type { \ - using type = portblas_t; \ +#define DEF_GENERIC_BLAS_TYPE(onemath_t, generic_t) \ + template <> \ + struct generic_type { \ + using type = generic_t; \ }; -DEF_PORTBLAS_TYPE(sycl::queue, handle_t) -DEF_PORTBLAS_TYPE(int64_t, int64_t) -DEF_PORTBLAS_TYPE(sycl::half, sycl::half) -DEF_PORTBLAS_TYPE(float, float) -DEF_PORTBLAS_TYPE(double, double) -DEF_PORTBLAS_TYPE(oneapi::math::transpose, char) -DEF_PORTBLAS_TYPE(oneapi::math::uplo, char) -DEF_PORTBLAS_TYPE(oneapi::math::side, char) -DEF_PORTBLAS_TYPE(oneapi::math::diag, char) -DEF_PORTBLAS_TYPE(std::complex, sycl_complex_t) -DEF_PORTBLAS_TYPE(std::complex, sycl_complex_t) -// Passthrough of portBLAS arg types for more complex wrapping. -DEF_PORTBLAS_TYPE(::blas::gemm_batch_type_t, ::blas::gemm_batch_type_t) - -#undef DEF_PORTBLAS_TYPE +DEF_GENERIC_BLAS_TYPE(sycl::queue, handle_t) +DEF_GENERIC_BLAS_TYPE(int64_t, int64_t) +DEF_GENERIC_BLAS_TYPE(sycl::half, sycl::half) +DEF_GENERIC_BLAS_TYPE(float, float) +DEF_GENERIC_BLAS_TYPE(double, double) +DEF_GENERIC_BLAS_TYPE(oneapi::math::transpose, char) +DEF_GENERIC_BLAS_TYPE(oneapi::math::uplo, char) +DEF_GENERIC_BLAS_TYPE(oneapi::math::side, char) +DEF_GENERIC_BLAS_TYPE(oneapi::math::diag, char) +DEF_GENERIC_BLAS_TYPE(std::complex, sycl_complex_t) +DEF_GENERIC_BLAS_TYPE(std::complex, sycl_complex_t) +// Passthrough of onemath_sycl_blas arg types for more complex wrapping. +DEF_GENERIC_BLAS_TYPE(::blas::gemm_batch_type_t, ::blas::gemm_batch_type_t) + +#undef DEF_GENERIC_BLAS_TYPE template -struct portblas_type> { +struct generic_type> { using type = buffer_iterator_t; }; template -struct portblas_type { +struct generic_type { using type = ElemT*; }; // USM Complex template -struct portblas_type*> { +struct generic_type*> { using type = sycl_complex_t*; }; template -struct portblas_type*> { +struct generic_type*> { using type = const sycl_complex_t*; }; template <> -struct portblas_type> { +struct generic_type> { using type = std::vector; }; -/** Convert a oneMath argument to the type required for portBLAS. +/** Convert a oneMath argument to the type required for onemath_sycl_blas. * * @tparam InputT The oneMath type. * @param input The value of the oneMath type. - * @return The portBLAS value with appropriate type. + * @return The onemath_sycl_blas value with appropriate type. **/ template -inline typename portblas_type::type convert_to_portblas_type(InputT& input) { - return typename portblas_type::type(input); +inline typename generic_type::type convert_to_generic_type(InputT& input) { + return typename generic_type::type(input); } template <> -inline char convert_to_portblas_type(oneapi::math::transpose& trans) { +inline char convert_to_generic_type(oneapi::math::transpose& trans) { if (trans == oneapi::math::transpose::nontrans) { return 'n'; } @@ -126,7 +126,7 @@ inline char convert_to_portblas_type(oneapi::math::tran } template <> -inline char convert_to_portblas_type(oneapi::math::uplo& upper_lower) { +inline char convert_to_generic_type(oneapi::math::uplo& upper_lower) { if (upper_lower == oneapi::math::uplo::upper) { return 'u'; } @@ -136,7 +136,7 @@ inline char convert_to_portblas_type(oneapi::math::uplo& upp } template <> -inline char convert_to_portblas_type(oneapi::math::side& left_right) { +inline char convert_to_generic_type(oneapi::math::side& left_right) { if (left_right == oneapi::math::side::left) { return 'l'; } @@ -146,7 +146,7 @@ inline char convert_to_portblas_type(oneapi::math::side& lef } template <> -inline char convert_to_portblas_type(oneapi::math::diag& unit_diag) { +inline char convert_to_generic_type(oneapi::math::diag& unit_diag) { if (unit_diag == oneapi::math::diag::unit) { return 'u'; } @@ -156,8 +156,8 @@ inline char convert_to_portblas_type(oneapi::math::diag& uni } template -inline auto convert_to_portblas_type(ArgT... args) { - return std::make_tuple(convert_to_portblas_type(args)...); +inline auto convert_to_generic_type(ArgT... args) { + return std::make_tuple(convert_to_generic_type(args)...); } /** Throw an unsupported_device exception if a certain argument type is found in @@ -189,15 +189,15 @@ struct throw_if_unsupported_by_device { } // namespace detail -#define CALL_PORTBLAS_FN(portBLASFunc, ...) \ +#define CALL_GENERIC_BLAS_FN(genericFunc, ...) \ if constexpr (is_column_major()) { \ detail::throw_if_unsupported_by_device, sycl::aspect::fp64>{}( \ - " portBLAS function requiring fp64 support", __VA_ARGS__); \ + " generic BLAS function requiring fp64 support", __VA_ARGS__); \ detail::throw_if_unsupported_by_device, sycl::aspect::fp16>{}( \ - " portBLAS function requiring fp16 support", __VA_ARGS__); \ - auto args = detail::convert_to_portblas_type(__VA_ARGS__); \ + " generic BLAS function requiring fp16 support", __VA_ARGS__); \ + auto args = detail::convert_to_generic_type(__VA_ARGS__); \ auto fn = [](auto&&... targs) { \ - portBLASFunc(std::forward(targs)...); \ + genericFunc(std::forward(targs)...); \ }; \ try { \ std::apply(fn, args); \ @@ -207,18 +207,18 @@ struct throw_if_unsupported_by_device { } \ } \ else { \ - throw unimplemented("blas", "portBLAS function"); \ + throw unimplemented("blas", "onemath_sycl_blas function"); \ } -#define CALL_PORTBLAS_USM_FN(portblasFunc, ...) \ +#define CALL_GENERIC_BLAS_USM_FN(genericFunc, ...) \ if constexpr (is_column_major()) { \ detail::throw_if_unsupported_by_device{}( \ - " portBLAS function requiring fp64 support", __VA_ARGS__); \ + " generic BLAS function requiring fp64 support", __VA_ARGS__); \ detail::throw_if_unsupported_by_device{}( \ - " portBLAS function requiring fp16 support", __VA_ARGS__); \ - auto args = detail::convert_to_portblas_type(__VA_ARGS__); \ + " generic BLAS function requiring fp16 support", __VA_ARGS__); \ + auto args = detail::convert_to_generic_type(__VA_ARGS__); \ auto fn = [](auto&&... targs) { \ - return portblasFunc(std::forward(targs)...).back(); \ + return genericFunc(std::forward(targs)...).back(); \ }; \ try { \ return std::apply(fn, args); \ @@ -228,12 +228,12 @@ struct throw_if_unsupported_by_device { } \ } \ else { \ - throw unimplemented("blas", "portBLAS function"); \ + throw unimplemented("blas", "onemath_sycl_blas function"); \ } -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi -#endif // _PORTBLAS_COMMON_HPP_ +#endif // _GENERIC_BLAS_COMMON_HPP_ diff --git a/src/blas/backends/portblas/portblas_gemm_bias.cxx b/src/blas/backends/generic/generic_gemm_bias.cxx similarity index 100% rename from src/blas/backends/portblas/portblas_gemm_bias.cxx rename to src/blas/backends/generic/generic_gemm_bias.cxx diff --git a/src/blas/backends/portblas/portblas_level1.cxx b/src/blas/backends/generic/generic_level1.cxx similarity index 84% rename from src/blas/backends/portblas/portblas_level1.cxx rename to src/blas/backends/generic/generic_level1.cxx index 6d1f39463..02815c2a0 100644 --- a/src/blas/backends/portblas/portblas_level1.cxx +++ b/src/blas/backends/generic/generic_level1.cxx @@ -33,7 +33,7 @@ void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - CALL_PORTBLAS_FN(::blas::_iamax, queue, n, x, incx, result); + CALL_GENERIC_BLAS_FN(::blas::_iamax, queue, n, x, incx, result); } void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -43,7 +43,7 @@ void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - CALL_PORTBLAS_FN(::blas::_iamin, queue, n, x, incx, result); + CALL_GENERIC_BLAS_FN(::blas::_iamin, queue, n, x, incx, result); } void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -58,18 +58,18 @@ void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - // portBLAS asum implementation requires that result is initialized to zero + // Generic BLAS asum implementation requires that result is initialized to zero // before performing the computation. queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); - CALL_PORTBLAS_FN(::blas::_asum, queue, n, x, incx, result); + CALL_GENERIC_BLAS_FN(::blas::_asum, queue, n, x, incx, result); } void axpy(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy); } void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -91,7 +91,7 @@ void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_copy, queue, n, x, incx, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_copy, queue, n, x, incx, y, incy); } void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -101,13 +101,13 @@ void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - // portBLAS dot implementation requires that result is initialized to zero + // Generic BLAS dot implementation requires that result is initialized to zero // before performing the computation. queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); - CALL_PORTBLAS_FN(::blas::_dot, queue, n, x, incx, y, incy, result); + CALL_GENERIC_BLAS_FN(::blas::_dot, queue, n, x, incx, y, incy, result); } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE @@ -120,13 +120,13 @@ void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int void sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { - // portBLAS sdsdot implementation requires that result is initialized to zero + // Generic BLAS sdsdot implementation requires that result is initialized to zero // before performing the computation. queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); - CALL_PORTBLAS_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result); + CALL_GENERIC_BLAS_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result); } void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -136,13 +136,13 @@ void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { - // portBLAS nrm2 implementation requires that result is initialized to zero + // Generic BLAS nrm2 implementation requires that result is initialized to zero // before performing the computation. queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); - CALL_PORTBLAS_FN(::blas::_nrm2, queue, n, x, incx, result); + CALL_GENERIC_BLAS_FN(::blas::_nrm2, queue, n, x, incx, result); } void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -153,12 +153,12 @@ void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, real_t c, real_t s) { - CALL_PORTBLAS_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s); + CALL_GENERIC_BLAS_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s); } void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { - CALL_PORTBLAS_FN(::blas::_rotg, queue, a, b, c, s); + CALL_GENERIC_BLAS_FN(::blas::_rotg, queue, a, b, c, s); } void rotg(sycl::queue& queue, sycl::buffer, 1>& a, @@ -169,17 +169,17 @@ void rotg(sycl::queue& queue, sycl::buffer, 1>& a, void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { - CALL_PORTBLAS_FN(::blas::_rotm, queue, n, x, incx, y, incy, param); + CALL_GENERIC_BLAS_FN(::blas::_rotm, queue, n, x, incx, y, incy, param); } void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, sycl::buffer& x1, real_t y1, sycl::buffer& param) { - CALL_PORTBLAS_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param); + CALL_GENERIC_BLAS_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param); } void scal(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_scal, queue, n, alpha, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_scal, queue, n, alpha, x, incx); } void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -194,7 +194,7 @@ void scal(sycl::queue& queue, std::int64_t n, real_t alpha, void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_swap, queue, n, x, incx, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_swap, queue, n, x, incx, y, incy); } void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, @@ -218,7 +218,7 @@ sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* sycl::event iamax(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_iamax, queue, n, x, incx, result, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_iamax, queue, n, x, incx, result, dependencies); } sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, @@ -229,7 +229,7 @@ sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex sycl::event iamin(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, std::int64_t* result, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_iamin, queue, n, x, incx, result, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_iamin, queue, n, x, incx, result, dependencies); } sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, @@ -245,19 +245,19 @@ sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* sycl::event asum(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, real_t* result, const std::vector& dependencies) { - // portBLAS asum implementation requires result to be initializes to zero + // Generic BLAS asum implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); - CALL_PORTBLAS_USM_FN(::blas::_asum, queue, n, x, incx, result, new_dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_asum, queue, n, x, incx, result, new_dependencies); } sycl::event axpy(sycl::queue& queue, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy, dependencies); } sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -281,7 +281,7 @@ sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha sycl::event copy(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_copy, queue, n, x, incx, y, incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_copy, queue, n, x, incx, y, incy, dependencies); } sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, @@ -293,13 +293,13 @@ sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* sycl::event dot(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* result, const std::vector& dependencies) { - // portBLAS dot implementation requires result to be initializes to zero + // Generic BLAS dot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); - CALL_PORTBLAS_USM_FN(::blas::_dot, queue, n, x, incx, y, incy, result, new_dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_dot, queue, n, x, incx, y, incy, result, new_dependencies); } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE @@ -313,13 +313,14 @@ sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t sycl::event sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* result, const std::vector& dependencies) { - // portBLAS sdsdot implementation requires result to be initializes to zero + // Generic BLAS sdsdot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); - CALL_PORTBLAS_USM_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result, new_dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result, + new_dependencies); } sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, @@ -329,13 +330,13 @@ sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* sycl::event nrm2(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, real_t* result, const std::vector& dependencies) { - // portBLAS nrm2 implementation requires result to be initializes to zero + // Generic BLAS nrm2 implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); - CALL_PORTBLAS_USM_FN(::blas::_nrm2, queue, n, x, incx, result, new_dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_nrm2, queue, n, x, incx, result, new_dependencies); } sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, @@ -347,12 +348,12 @@ sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std sycl::event rot(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, real_t c, real_t s, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s, dependencies); } sycl::event rotg(sycl::queue& queue, real_t* a, real_t* b, real_t* c, real_t* s, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_rotg, queue, a, b, c, s, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_rotg, queue, a, b, c, s, dependencies); } sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, real_t* c, @@ -362,17 +363,17 @@ sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_rotm, queue, n, x, incx, y, incy, param, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_rotm, queue, n, x, incx, y, incy, param, dependencies); } sycl::event rotmg(sycl::queue& queue, real_t* d1, real_t* d2, real_t* x1, real_t y1, real_t* param, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param, dependencies); } sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_scal, queue, n, alpha, x, incx, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_scal, queue, n, alpha, x, incx, dependencies); } sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, @@ -388,7 +389,7 @@ sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, std::complex< sycl::event swap(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_swap, queue, n, x, incx, y, incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_swap, queue, n, x, incx, y, incy, dependencies); } sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, diff --git a/src/blas/backends/portblas/portblas_level1_double.cpp b/src/blas/backends/generic/generic_level1_double.cpp similarity index 87% rename from src/blas/backends/portblas/portblas_level1_double.cpp rename to src/blas/backends/generic/generic_level1_double.cpp index 172fcfa8d..57420529c 100644 --- a/src/blas/backends/portblas/portblas_level1_double.cpp +++ b/src/blas/backends/generic/generic_level1_double.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = double; #define ENABLE_MIXED_PRECISION_WITH_DOUBLE @@ -41,7 +41,7 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level1.cxx" +#include "generic_level1.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -51,12 +51,12 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level1.cxx" +#include "generic_level1.cxx" #undef ROW_MAJOR #undef ENABLE_MIXED_PRECISION_WITH_DOUBLE } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level2_float.cpp b/src/blas/backends/generic/generic_level1_float.cpp similarity index 86% rename from src/blas/backends/portblas/portblas_level2_float.cpp rename to src/blas/backends/generic/generic_level1_float.cpp index 87c5b7842..f7634115d 100644 --- a/src/blas/backends/portblas/portblas_level2_float.cpp +++ b/src/blas/backends/generic/generic_level1_float.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = float; @@ -40,7 +40,7 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level2.cxx" +#include "generic_level1.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -50,11 +50,11 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level2.cxx" +#include "generic_level1.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level2.cxx b/src/blas/backends/generic/generic_level2.cxx similarity index 85% rename from src/blas/backends/portblas/portblas_level2.cxx rename to src/blas/backends/generic/generic_level2.cxx index e1bc4766a..03e805813 100644 --- a/src/blas/backends/portblas/portblas_level2.cxx +++ b/src/blas/backends/generic/generic_level2.cxx @@ -22,7 +22,7 @@ void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, @@ -36,8 +36,8 @@ void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, - incy); + CALL_GENERIC_BLAS_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, + incy); } void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, @@ -51,7 +51,7 @@ void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - CALL_PORTBLAS_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda); + CALL_GENERIC_BLAS_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda); } void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, @@ -118,49 +118,51 @@ void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, - incy); + CALL_GENERIC_BLAS_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, + incy); } void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, + incy); } void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { - CALL_PORTBLAS_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda); + CALL_GENERIC_BLAS_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda); } void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { - CALL_PORTBLAS_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); + CALL_GENERIC_BLAS_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { - CALL_PORTBLAS_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); + CALL_GENERIC_BLAS_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { - CALL_PORTBLAS_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a); + CALL_GENERIC_BLAS_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a); } void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a) { - CALL_PORTBLAS_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a); + CALL_GENERIC_BLAS_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a); } void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, + incx); } void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -173,7 +175,8 @@ void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tran void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, + incx); } void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -186,7 +189,7 @@ void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tran void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -198,7 +201,7 @@ void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tran void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -210,7 +213,7 @@ void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tran void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -222,7 +225,7 @@ void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tran void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - CALL_PORTBLAS_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); + CALL_GENERIC_BLAS_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -237,8 +240,8 @@ sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, + incy, dependencies); } sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, @@ -253,8 +256,8 @@ sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t std::int64_t kl, std::int64_t ku, real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, - incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, + beta, y, incy, dependencies); } sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, @@ -268,7 +271,8 @@ sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, std::int64_t lda, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda, + dependencies); } sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, @@ -339,56 +343,57 @@ sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_ real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, - incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, + y, incy, dependencies); } sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, - incy, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, + incy, dependencies); } sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, real_t* a, std::int64_t lda, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda, + dependencies); } sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, std::int64_t lda, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, + dependencies); } sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* a, const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, + dependencies); } sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, real_t* a, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a, dependencies); } sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, + dependencies); } sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, std::int64_t lda, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, + incx, dependencies); } sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -402,8 +407,8 @@ sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::mat oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, std::int64_t lda, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, + incx, dependencies); } sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -416,8 +421,8 @@ sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::mat sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx, + dependencies); } sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -430,8 +435,8 @@ sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::mat sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx, + dependencies); } sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -444,8 +449,8 @@ sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::mat sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, + incx, dependencies); } sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, @@ -458,8 +463,8 @@ sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::mat sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, real_t* x, std::int64_t incx, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, + incx, dependencies); } sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, diff --git a/src/blas/backends/portblas/portblas_level3_double.cpp b/src/blas/backends/generic/generic_level2_double.cpp similarity index 86% rename from src/blas/backends/portblas/portblas_level3_double.cpp rename to src/blas/backends/generic/generic_level2_double.cpp index 80f2d0bce..80792a739 100644 --- a/src/blas/backends/portblas/portblas_level3_double.cpp +++ b/src/blas/backends/generic/generic_level2_double.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = double; @@ -40,7 +40,7 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level3.cxx" +#include "generic_level2.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -50,11 +50,11 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level3.cxx" +#include "generic_level2.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level1_float.cpp b/src/blas/backends/generic/generic_level2_float.cpp similarity index 86% rename from src/blas/backends/portblas/portblas_level1_float.cpp rename to src/blas/backends/generic/generic_level2_float.cpp index b4a54375d..157f4df99 100644 --- a/src/blas/backends/portblas/portblas_level1_float.cpp +++ b/src/blas/backends/generic/generic_level2_float.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = float; @@ -40,7 +40,7 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level1.cxx" +#include "generic_level2.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -50,11 +50,11 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level1.cxx" +#include "generic_level2.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level3.cxx b/src/blas/backends/generic/generic_level3.cxx similarity index 91% rename from src/blas/backends/portblas/portblas_level3.cxx rename to src/blas/backends/generic/generic_level3.cxx index d1aa32652..254db5507 100644 --- a/src/blas/backends/portblas/portblas_level3.cxx +++ b/src/blas/backends/generic/generic_level3.cxx @@ -23,8 +23,8 @@ void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::tran std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, sycl::buffer& c, std::int64_t ldc) { - CALL_PORTBLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc); + CALL_GENERIC_BLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, + c, ldc); } void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, @@ -35,9 +35,10 @@ void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::tran using sycl_complex_real_t = sycl::ext::oneapi::experimental::complex; if (transa == oneapi::math::transpose::conjtrans || transb == oneapi::math::transpose::conjtrans) { - throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS"); + throw unimplemented("blas", "gemm", + "Conjugate Transpose unsupported yet on onemath_sycl_blas"); } - // Intermediate buffers for conversion purposes as portBLAS expects sycl::complex instead of std::complex + // Intermediate buffers for conversion purposes as onemath_sycl_blas expects sycl::complex instead of std::complex sycl::buffer a_pb{ sycl::range<1>(a.size()) }; sycl::buffer b_pb{ sycl::range<1>(b.size()) }; sycl::buffer c_pb{ sycl::range<1>(c.size()) }; @@ -54,8 +55,8 @@ void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::tran sycl::accessor c_pb_acc(c_pb); queue.copy(c_acc, c_pb_acc); - CALL_PORTBLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a_pb, lda, b_pb, ldb, - beta, c_pb, ldc); + CALL_GENERIC_BLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a_pb, lda, b_pb, ldb, + beta, c_pb, ldc); // Copy c_pb back to c sycl::accessor, 1, sycl::access::mode::write> out_acc(c); @@ -67,8 +68,8 @@ void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, sycl::buffer& c, std::int64_t ldc) { - CALL_PORTBLAS_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc); + CALL_GENERIC_BLAS_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc); } void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, @@ -148,8 +149,8 @@ void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - CALL_PORTBLAS_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb); + CALL_GENERIC_BLAS_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb); } void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, @@ -177,7 +178,7 @@ void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::tra void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { - CALL_PORTBLAS_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb); + CALL_GENERIC_BLAS_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, @@ -189,8 +190,8 @@ void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { - CALL_PORTBLAS_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, - strideb); + CALL_GENERIC_BLAS_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, + strideb); } void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, @@ -215,8 +216,8 @@ void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_ real_t alpha, sycl::buffer& a, std::int64_t lda, real_t beta, sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { - CALL_PORTBLAS_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, - ldc); + CALL_GENERIC_BLAS_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, + c, ldc); } void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, @@ -232,8 +233,8 @@ sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::mat std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); } sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, @@ -243,18 +244,19 @@ sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::mat std::int64_t ldc, const std::vector& dependencies) { if (transa == oneapi::math::transpose::conjtrans || transb == oneapi::math::transpose::conjtrans) { - throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS"); + throw unimplemented("blas", "gemm", + "Conjugate Transpose unsupported yet on onemath_sycl_blas"); } - CALL_PORTBLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); } sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, + ldb, beta, c, ldc, dependencies); } sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, @@ -337,8 +339,8 @@ sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, std::int64_t ldb, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, - alpha, a, lda, b, ldb, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); } sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, @@ -369,8 +371,8 @@ sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, std::int64_t ldb, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb, - dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb, + dependencies); } sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, @@ -384,8 +386,8 @@ sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std:: real_t alpha, const real_t* a, std::int64_t lda, std::int64_t stridea, real_t* b, std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, - strideb, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, + strideb, dependencies); } sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, @@ -411,8 +413,8 @@ sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std: std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t beta, const real_t* b, std::int64_t ldb, real_t* c, std::int64_t ldc, const std::vector& dependencies) { - CALL_PORTBLAS_USM_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, - c, ldc, dependencies); + CALL_GENERIC_BLAS_USM_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, + ldb, c, ldc, dependencies); } sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, diff --git a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp b/src/blas/backends/generic/generic_level3_bfloat16.cpp similarity index 96% rename from src/blas/backends/portblas/portblas_level3_bfloat16.cpp rename to src/blas/backends/generic/generic_level3_bfloat16.cpp index 7721f58c9..d1ec623fb 100644 --- a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp +++ b/src/blas/backends/generic/generic_level3_bfloat16.cpp @@ -24,12 +24,12 @@ #endif #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { namespace column_major { // BUFFER @@ -72,7 +72,7 @@ sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::mat } } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level2_double.cpp b/src/blas/backends/generic/generic_level3_double.cpp similarity index 86% rename from src/blas/backends/portblas/portblas_level2_double.cpp rename to src/blas/backends/generic/generic_level3_double.cpp index 5b26558f0..d514080c6 100644 --- a/src/blas/backends/portblas/portblas_level2_double.cpp +++ b/src/blas/backends/generic/generic_level3_double.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = double; @@ -40,7 +40,7 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level2.cxx" +#include "generic_level3.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -50,11 +50,11 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level2.cxx" +#include "generic_level3.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level3_float.cpp b/src/blas/backends/generic/generic_level3_float.cpp similarity index 82% rename from src/blas/backends/portblas/portblas_level3_float.cpp rename to src/blas/backends/generic/generic_level3_float.cpp index dea42ed9d..053345a6d 100644 --- a/src/blas/backends/portblas/portblas_level3_float.cpp +++ b/src/blas/backends/generic/generic_level3_float.cpp @@ -23,14 +23,14 @@ #include #endif -#include "portblas_common.hpp" +#include "generic_common.hpp" #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { using real_t = float; @@ -40,8 +40,8 @@ namespace column_major { constexpr bool is_column_major() { return true; } -#include "portblas_level3.cxx" -#include "portblas_gemm_bias.cxx" +#include "generic_level3.cxx" +#include "generic_gemm_bias.cxx" #undef COLUMN_MAJOR } // namespace column_major @@ -51,12 +51,12 @@ namespace row_major { constexpr bool is_column_major() { return false; } -#include "portblas_level3.cxx" -#include "portblas_gemm_bias.cxx" +#include "generic_level3.cxx" +#include "generic_gemm_bias.cxx" #undef ROW_MAJOR } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_level3_half.cpp b/src/blas/backends/generic/generic_level3_half.cpp similarity index 97% rename from src/blas/backends/portblas/portblas_level3_half.cpp rename to src/blas/backends/generic/generic_level3_half.cpp index dbd71ab4b..cb012837e 100644 --- a/src/blas/backends/portblas/portblas_level3_half.cpp +++ b/src/blas/backends/generic/generic_level3_half.cpp @@ -24,12 +24,12 @@ #endif #include "oneapi/math/exceptions.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" namespace oneapi { namespace math { namespace blas { -namespace portblas { +namespace generic { namespace column_major { // BUFFER @@ -97,7 +97,7 @@ sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::mat } } // namespace row_major -} // namespace portblas +} // namespace generic } // namespace blas } // namespace math } // namespace oneapi diff --git a/src/blas/backends/portblas/portblas_wrappers.cpp b/src/blas/backends/generic/generic_wrappers.cpp similarity index 78% rename from src/blas/backends/portblas/portblas_wrappers.cpp rename to src/blas/backends/generic/generic_wrappers.cpp index 6777f358a..72f59869c 100644 --- a/src/blas/backends/portblas/portblas_wrappers.cpp +++ b/src/blas/backends/generic/generic_wrappers.cpp @@ -4,13 +4,13 @@ #include "blas/function_table.hpp" -#include "oneapi/math/blas/detail/portblas/onemath_blas_portblas.hpp" +#include "oneapi/math/blas/detail/generic/onemath_blas_generic.hpp" #define WRAPPER_VERSION 1 extern "C" ONEMATH_EXPORT blas_function_table_t onemath_blas_table = { WRAPPER_VERSION, -#define BACKEND portblas +#define BACKEND generic #define MAJOR column_major #include "../backend_wrappers.cxx" #undef MAJOR diff --git a/src/config.hpp.in b/src/config.hpp.in index 3fde31c79..cf689fd8c 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -28,11 +28,11 @@ #cmakedefine ONEMATH_ENABLE_MKLCPU_BACKEND #cmakedefine ONEMATH_ENABLE_MKLGPU_BACKEND #cmakedefine ONEMATH_ENABLE_NETLIB_BACKEND -#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND -#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU -#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU -#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU -#cmakedefine ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU +#cmakedefine ONEMATH_ENABLE_GENERIC_BLAS_BACKEND +#cmakedefine ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU +#cmakedefine ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU +#cmakedefine ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU +#cmakedefine ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU #cmakedefine ONEMATH_ENABLE_PORTFFT_BACKEND #cmakedefine ONEMATH_ENABLE_ROCBLAS_BACKEND #cmakedefine ONEMATH_ENABLE_ROCFFT_BACKEND diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 3e1f0824b..a376c051b 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -67,7 +67,7 @@ class table_initializer { } private: -#if defined(ENABLE_PORTBLAS_BACKEND) || defined(ENABLE_PORTFFT_BACKEND) +#if defined(ENABLE_GENERIC_BLAS_BACKEND) || defined(ENABLE_PORTFFT_BACKEND) static constexpr bool is_generic_device_supported = true; #else static constexpr bool is_generic_device_supported = false; diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index e335d4315..24af963df 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -152,9 +152,9 @@ foreach(domain ${TEST_TARGET_DOMAINS}) list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_netlib) endif() - if(domain STREQUAL "blas" AND ENABLE_PORTBLAS_BACKEND) - add_dependencies(test_main_${domain}_ct onemath_${domain}_portblas) - list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_portblas) + if(domain STREQUAL "blas" AND ENABLE_GENERIC_BLAS_BACKEND) + add_dependencies(test_main_${domain}_ct onemath_${domain}_generic) + list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_generic) endif() if(domain STREQUAL "lapack" AND ENABLE_CUSOLVER_BACKEND) diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp index f91d71d96..8b7a5344c 100644 --- a/tests/unit_tests/include/test_helper.hpp +++ b/tests/unit_tests/include/test_helper.hpp @@ -139,11 +139,11 @@ #define TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, ...) #endif -#ifdef ONEMATH_ENABLE_PORTBLAS_BACKEND -#define TEST_RUN_PORTBLAS_SELECT(q, func, ...) \ - func(oneapi::math::backend_selector{ q }, __VA_ARGS__) +#ifdef ONEMATH_ENABLE_GENERIC_BLAS_BACKEND +#define TEST_RUN_GENERIC_BLAS_SELECT(q, func, ...) \ + func(oneapi::math::backend_selector{ q }, __VA_ARGS__) #else -#define TEST_RUN_PORTBLAS_SELECT(q, func, ...) +#define TEST_RUN_GENERIC_BLAS_SELECT(q, func, ...) #endif #ifdef ONEMATH_ENABLE_CUFFT_BACKEND @@ -233,7 +233,7 @@ TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, __VA_ARGS__); \ } \ } \ - TEST_RUN_PORTBLAS_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_GENERIC_BLAS_SELECT(q, func, __VA_ARGS__); \ } while (0); #define TEST_RUN_RNG_CT_SELECT(q, func, ...) \ diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index bcc0bec38..74057a8aa 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -112,21 +112,21 @@ int main(int argc, char** argv) { if (unique_devices.find(dev.get_info()) == unique_devices.end()) { unique_devices.insert(dev.get_info()); -#if !defined(ONEMATH_ENABLE_MKLCPU_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ +#if !defined(ONEMATH_ENABLE_MKLCPU_BACKEND) && \ + !defined(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_CPU) && \ !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && !defined(ONEMATH_ENABLE_NETLIB_BACKEND) if (dev.is_cpu()) continue; #endif -#if !defined(ONEMATH_ENABLE_MKLGPU_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_GPU) && \ +#if !defined(ONEMATH_ENABLE_MKLGPU_BACKEND) && \ + !defined(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_INTEL_GPU) && \ !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == INTEL_ID) continue; #endif #if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && !defined(ONEMATH_ENABLE_CURAND_BACKEND) && \ !defined(ONEMATH_ENABLE_CUSOLVER_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) && \ + !defined(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_NVIDIA_GPU) && \ !defined(ONEMATH_ENABLE_CUFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && \ !defined(ONEMATH_ENABLE_CUSPARSE_BACKEND) if (dev.is_gpu() && vendor_id == NVIDIA_ID) @@ -134,7 +134,7 @@ int main(int argc, char** argv) { #endif #if !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND) && !defined(ONEMATH_ENABLE_ROCRAND_BACKEND) && \ !defined(ONEMATH_ENABLE_ROCSOLVER_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && \ + !defined(ONEMATH_ENABLE_GENERIC_BLAS_BACKEND_AMD_GPU) && \ !defined(ONEMATH_ENABLE_ROCFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && \ !defined(ONEMATH_ENABLE_ROCSPARSE_BACKEND) if (dev.is_gpu() && vendor_id == AMD_ID)