Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BLAS] add interfaces to matrix copy/transposition routines #227

Merged
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
23b8d16
[BLAS] add transpose batch strided routines in mkl_batch.cxx, mkl_bla…
andrewtbarker Sep 12, 2022
2134c0e
[BLAS] add batch strided transpose to oneapi/mkl/blas.hxx
andrewtbarker Sep 12, 2022
e0ae85d
[BLAS] add blas_ct.hxx templates in five backends for batch strided t…
andrewtbarker Sep 12, 2022
418a8a6
[BLAS] transpose batch in blas_ct_backends.hxx
andrewtbarker Sep 12, 2022
fe7c68a
[BLAS] onemkl_blas_backends.hxx
andrewtbarker Sep 12, 2022
5e51e21
[BLAS] blas_loader.hxx
andrewtbarker Sep 13, 2022
d61b302
[BLAS] transpose routines in cublas, rocblas backends
andrewtbarker Sep 13, 2022
02e6ce6
[BLAS] predicates.hxx
andrewtbarker Sep 13, 2022
8715925
[BLAS] netlib_batch.cxx
andrewtbarker Sep 13, 2022
9a942e0
[BLAS] fix bad dependencies in predicates.cxx
andrewtbarker Sep 13, 2022
f5cf032
[BLAS] other backends, rocblas_batch.cpp, cublas_batch.cpp
andrewtbarker Sep 13, 2022
57f985a
[BLAS] function_table.hpp
andrewtbarker Sep 13, 2022
3c54f05
[BLAS] blas_loader.cpp
andrewtbarker Sep 13, 2022
d1b7c8a
[BLAS] backend wrappers for batch strided transpose
andrewtbarker Sep 13, 2022
724fcf6
[BLAS] add tests for transpose, fix minor issues for build
andrewtbarker Sep 13, 2022
2359b66
[BLAS] fix omatcopy, imatcopy USM tests
andrewtbarker Sep 13, 2022
57da1af
[BLAS] fix omatadd_batch_stride (buffer) test
andrewtbarker Sep 13, 2022
1ad05f6
[BLAS] add omatadd_batch_stride_usm test
andrewtbarker Sep 13, 2022
3fce213
[BLAS] fix build for cublas/rocblas backends
andrewtbarker Sep 14, 2022
0374376
[BLAS] clang-format for transpose interfaces
andrewtbarker Sep 14, 2022
85246f9
[BLAS] fix row_major/column_major namespaces in rocblas/cublas backen…
andrewtbarker Sep 19, 2022
9eac95c
[BLAS] add reference code for tests of omatcopy batch stride (usm)
andrewtbarker Sep 19, 2022
55f87f8
[BLAS] reference test for omatcopy_batch_stride (buffer)
andrewtbarker Sep 19, 2022
a759727
[BLAS] actual tests for imatcopy_batch_stride, clean up common tests
andrewtbarker Sep 19, 2022
c8c56a6
[BLAS] reference code and actual tests for omatadd_batch_strided
andrewtbarker Sep 19, 2022
16c550a
[BLAS] move transpose reference implementations to reference_blas_tem…
andrewtbarker Sep 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
342 changes: 342 additions & 0 deletions include/oneapi/mkl/blas.hxx

Large diffs are not rendered by default.

166 changes: 166 additions & 0 deletions include/oneapi/mkl/blas/detail/blas_ct_backends.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,82 @@ static inline void symv(backend_selector<backend::BACKEND> selector, uplo upper_
std::int64_t lda, sycl::buffer<double, 1> &x, std::int64_t incx,
double beta, sycl::buffer<double, 1> &y, std::int64_t incy);

static inline void omatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha,
sycl::buffer<float, 1> &a, std::int64_t lda,
std::int64_t stride_a, sycl::buffer<float, 1> &b,
std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);

static inline void omatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, double alpha,
sycl::buffer<double, 1> &a, std::int64_t lda,
std::int64_t stride_a, sycl::buffer<double, 1> &b,
std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);

static inline void omatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
sycl::buffer<std::complex<float>, 1> &a, std::int64_t lda,
std::int64_t stride_a, sycl::buffer<std::complex<float>, 1> &b,
std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);

static inline void omatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
std::int64_t stride_a, sycl::buffer<std::complex<double>, 1> &b,
std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size);

static inline void imatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, float alpha,
sycl::buffer<float, 1> &ab, std::int64_t lda, std::int64_t ldb,
std::int64_t stride, std::int64_t batch_size);

static inline void imatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, double alpha,
sycl::buffer<double, 1> &ab, std::int64_t lda, std::int64_t ldb,
std::int64_t stride, std::int64_t batch_size);

static inline void imatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<float> alpha,
sycl::buffer<std::complex<float>, 1> &ab, std::int64_t lda,
std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);

static inline void imatcopy_batch(backend_selector<backend::BACKEND> selector, transpose trans,
std::int64_t m, std::int64_t n, std::complex<double> alpha,
sycl::buffer<std::complex<double>, 1> &ab, std::int64_t lda,
std::int64_t ldb, std::int64_t stride, std::int64_t batch_size);

static inline void omatadd_batch(backend_selector<backend::BACKEND> selector, transpose transa,
transpose transb, std::int64_t m, std::int64_t n, float alpha,
sycl::buffer<float, 1> &a, std::int64_t lda, std::int64_t stride_a,
float beta, sycl::buffer<float, 1> &b, std::int64_t ldb,
std::int64_t stride_b, sycl::buffer<float, 1> &c, std::int64_t ldc,
std::int64_t stride_c, std::int64_t batch_size);

static inline void omatadd_batch(backend_selector<backend::BACKEND> selector, transpose transa,
transpose transb, std::int64_t m, std::int64_t n, double alpha,
sycl::buffer<double, 1> &a, std::int64_t lda,
std::int64_t stride_a, double beta, sycl::buffer<double, 1> &b,
std::int64_t ldb, std::int64_t stride_b,
sycl::buffer<double, 1> &c, std::int64_t ldc,
std::int64_t stride_c, std::int64_t batch_size);

static inline void omatadd_batch(backend_selector<backend::BACKEND> selector, transpose transa,
transpose transb, std::int64_t m, std::int64_t n,
std::complex<float> alpha, sycl::buffer<std::complex<float>, 1> &a,
std::int64_t lda, std::int64_t stride_a, std::complex<float> beta,
sycl::buffer<std::complex<float>, 1> &b, std::int64_t ldb,
std::int64_t stride_b, sycl::buffer<std::complex<float>, 1> &c,
std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);

static inline void omatadd_batch(backend_selector<backend::BACKEND> selector, transpose transa,
transpose transb, std::int64_t m, std::int64_t n,
std::complex<double> alpha,
sycl::buffer<std::complex<double>, 1> &a, std::int64_t lda,
std::int64_t stride_a, std::complex<double> beta,
sycl::buffer<std::complex<double>, 1> &b, std::int64_t ldb,
std::int64_t stride_b, sycl::buffer<std::complex<double>, 1> &c,
std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size);

// USM APIs

static inline sycl::event syr2(backend_selector<backend::BACKEND> selector, uplo upper_lower,
Expand Down Expand Up @@ -2484,3 +2560,93 @@ static inline sycl::event symv(backend_selector<backend::BACKEND> selector, uplo
const double *x, std::int64_t incx, double beta, double *y,
std::int64_t incy,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
float alpha, const float *a, std::int64_t lda,
std::int64_t stride_a, float *b, std::int64_t ldb,
std::int64_t stride_b, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
double alpha, const double *a, std::int64_t lda,
std::int64_t stride_a, double *b, std::int64_t ldb,
std::int64_t stride_b, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
std::complex<float> alpha, const std::complex<float> *a,
std::int64_t lda, std::int64_t stride_a,
std::complex<float> *b, std::int64_t ldb,
std::int64_t stride_b, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
std::complex<double> alpha, const std::complex<double> *a,
std::int64_t lda, std::int64_t stride_a,
std::complex<double> *b, std::int64_t ldb,
std::int64_t stride_b, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event imatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
float alpha, float *ab, std::int64_t lda, std::int64_t ldb,
std::int64_t stride, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event imatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
double alpha, double *ab, std::int64_t lda,
std::int64_t ldb, std::int64_t stride,
std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event imatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
std::complex<float> alpha, std::complex<float> *ab,
std::int64_t lda, std::int64_t ldb, std::int64_t stride,
std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event imatcopy_batch(backend_selector<backend::BACKEND> selector,
transpose trans, std::int64_t m, std::int64_t n,
std::complex<double> alpha, std::complex<double> *ab,
std::int64_t lda, std::int64_t ldb, std::int64_t stride,
std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatadd_batch(backend_selector<backend::BACKEND> selector,
transpose transa, transpose transb, std::int64_t m,
std::int64_t n, float alpha, const float *a,
std::int64_t lda, std::int64_t stride_a, float beta,
const float *b, std::int64_t ldb, std::int64_t stride_b,
float *c, std::int64_t ldc, std::int64_t stride_c,
std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatadd_batch(backend_selector<backend::BACKEND> selector,
transpose transa, transpose transb, std::int64_t m,
std::int64_t n, double alpha, const double *a,
std::int64_t lda, std::int64_t stride_a, double beta,
const double *b, std::int64_t ldb, std::int64_t stride_b,
double *c, std::int64_t ldc, std::int64_t stride_c,
std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatadd_batch(
backend_selector<backend::BACKEND> selector, transpose transa, transpose transb, std::int64_t m,
std::int64_t n, std::complex<float> alpha, const std::complex<float> *a, std::int64_t lda,
std::int64_t stride_a, std::complex<float> beta, const std::complex<float> *b, std::int64_t ldb,
std::int64_t stride_b, std::complex<float> *c, std::int64_t ldc, std::int64_t stride_c,
std::int64_t batch_size, const std::vector<sycl::event> &dependencies = {});

static inline sycl::event omatadd_batch(
backend_selector<backend::BACKEND> selector, transpose transa, transpose transb, std::int64_t m,
std::int64_t n, std::complex<double> alpha, const std::complex<double> *a, std::int64_t lda,
std::int64_t stride_a, std::complex<double> beta, const std::complex<double> *b,
std::int64_t ldb, std::int64_t stride_b, std::complex<double> *c, std::int64_t ldc,
std::int64_t stride_c, std::int64_t batch_size,
const std::vector<sycl::event> &dependencies = {});
Loading