From 87ca8bcba27523d26539c2dc14ce9693fcc3fa26 Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Thu, 31 Mar 2022 21:52:36 +0800 Subject: [PATCH 01/30] cusparse loaded --- taichi/backends/cuda/cuda_context.cpp | 3 ++- taichi/backends/cuda/cuda_context.h | 2 ++ taichi/backends/cuda/cuda_driver.cpp | 30 +++++++++++++++++++++++++++ taichi/backends/cuda/cuda_driver.h | 11 ++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index a138c3d555395..892a17b50e6de 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -13,7 +13,8 @@ TLANG_NAMESPACE_BEGIN CUDAContext::CUDAContext() - : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()) { + : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()), + cusparse_driver_(CUSPARSEDriver::get_instance()) { // CUDA initialization dev_count_ = 0; driver_.init(0); diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 69a02adf6f082..31c14b47f695c 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -15,6 +15,7 @@ TLANG_NAMESPACE_BEGIN // cases such as unit testing where many Taichi programs are created/destroyed. class CUDADriver; +class CUSPARSEDriver; class CUDAContext { private: @@ -26,6 +27,7 @@ class CUDAContext { std::mutex lock_; KernelProfilerBase *profiler_; CUDADriver &driver_; + CUSPARSEDriver & cusparse_driver_; bool debug_; public: diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index e01e1c0fe5bf2..c61bcc05ffc65 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -78,4 +78,34 @@ CUDADriver &CUDADriver::get_instance() { return get_instance_without_context(); } + +CUSPARSEDriver::CUSPARSEDriver() { + auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); + if (disabled_by_env_) { + TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); + return; + } + +#if defined(TI_PLATFORM_LINUX) + auto loader_ = std::make_unique("libcusparse.so"); +#elif defined(TI_PLATFORM_WINDOWS) + loader_ = std::make_unique("cusparse.dll"); +#else + static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); +#endif + + if (!loader_->loaded()) { + TI_WARN("CUSPARSE driver not found."); + return; + } + else { + TI_TRACE("cusparse loaded!"); + } +} + +CUSPARSEDriver& CUSPARSEDriver::get_instance() { + static CUSPARSEDriver* instance = new CUSPARSEDriver(); + return *instance; +} + TLANG_NAMESPACE_END diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 40e3eff765c63..87b11e1479b7a 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -127,4 +127,15 @@ class CUDADriver { bool cuda_version_valid_{false}; }; +class CUSPARSEDriver { +public: + static CUSPARSEDriver &get_instance(); + +private: + CUSPARSEDriver* instance; + CUSPARSEDriver(); + +}; + + TLANG_NAMESPACE_END From e3b1c7c5a7a3ab3961ca94ee1e8d0caf980e46e7 Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Wed, 4 May 2022 15:50:51 +0800 Subject: [PATCH 02/30] load cusolver --- taichi/backends/cuda/cuda_context.cpp | 3 ++- taichi/backends/cuda/cuda_context.h | 2 ++ taichi/backends/cuda/cuda_driver.cpp | 35 +++++++++++++++++++++++++-- taichi/backends/cuda/cuda_driver.h | 15 +++++++++++- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index 892a17b50e6de..c4cb302570144 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -14,7 +14,8 @@ TLANG_NAMESPACE_BEGIN CUDAContext::CUDAContext() : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()), - cusparse_driver_(CUSPARSEDriver::get_instance()) { + cusparse_driver_(CUSPARSEDriver::get_instance()), + cusolver_driver_(CUSOLVERDriver::get_instance()) { // CUDA initialization dev_count_ = 0; driver_.init(0); diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 31c14b47f695c..4a8cca6755799 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -16,6 +16,7 @@ TLANG_NAMESPACE_BEGIN class CUDADriver; class CUSPARSEDriver; +class CUSOLVERDriver; class CUDAContext { private: @@ -28,6 +29,7 @@ class CUDAContext { KernelProfilerBase *profiler_; CUDADriver &driver_; CUSPARSEDriver & cusparse_driver_; + CUSOLVERDriver & cusolver_driver_; bool debug_; public: diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index c61bcc05ffc65..5804f7f956758 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -80,6 +80,7 @@ CUDADriver &CUDADriver::get_instance() { CUSPARSEDriver::CUSPARSEDriver() { + // TODO: enable cusparse and cusolver flag env variable. auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); @@ -87,7 +88,7 @@ CUSPARSEDriver::CUSPARSEDriver() { } #if defined(TI_PLATFORM_LINUX) - auto loader_ = std::make_unique("libcusparse.so"); + loader_ = std::make_unique("libcusparse.so"); #elif defined(TI_PLATFORM_WINDOWS) loader_ = std::make_unique("cusparse.dll"); #else @@ -95,7 +96,7 @@ CUSPARSEDriver::CUSPARSEDriver() { #endif if (!loader_->loaded()) { - TI_WARN("CUSPARSE driver not found."); + TI_WARN("CUSPARSE lib not found."); return; } else { @@ -108,4 +109,34 @@ CUSPARSEDriver& CUSPARSEDriver::get_instance() { return *instance; } +CUSOLVERDriver::CUSOLVERDriver() { + // TODO: enable cusparse and cusolver flag env variable. + auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); + if (disabled_by_env_) { + TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); + return; + } + +#if defined(TI_PLATFORM_LINUX) + loader_ = std::make_unique("libcusolver.so"); +#elif defined(TI_PLATFORM_WINDOWS) + loader_ = std::make_unique("cusolver.dll"); +#else + static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); +#endif + + if (!loader_->loaded()) { + TI_WARN("cusolver lib not found."); + return; + } + else { + TI_TRACE("cusolver loaded!"); + } +} + +CUSOLVERDriver& CUSOLVERDriver::get_instance() { + static CUSOLVERDriver* instance = new CUSOLVERDriver(); + return *instance; +} + TLANG_NAMESPACE_END diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 87b11e1479b7a..770b98db743a0 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -127,15 +127,28 @@ class CUDADriver { bool cuda_version_valid_{false}; }; + + class CUSPARSEDriver { public: static CUSPARSEDriver &get_instance(); private: - CUSPARSEDriver* instance; + std::unique_ptr loader_; CUSPARSEDriver(); }; +class CUSOLVERDriver { +public: + static CUSOLVERDriver &get_instance(); + +private: + std::unique_ptr loader_; + CUSOLVERDriver(); + +}; + + TLANG_NAMESPACE_END From 1b8a8c755e2d10b4a24cc6256976ac52ca29b495 Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Wed, 4 May 2022 16:55:00 +0800 Subject: [PATCH 03/30] add driver base class --- taichi/backends/cuda/cuda_driver.cpp | 36 ++++++++-------------------- taichi/backends/cuda/cuda_driver.h | 14 +++++++---- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 5804f7f956758..9ebb15af33a82 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -78,19 +78,20 @@ CUDADriver &CUDADriver::get_instance() { return get_instance_without_context(); } - -CUSPARSEDriver::CUSPARSEDriver() { +CUDADriverBase::CUDADriverBase() { // TODO: enable cusparse and cusolver flag env variable. auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); return; } +} +void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { #if defined(TI_PLATFORM_LINUX) - loader_ = std::make_unique("libcusparse.so"); + loader_ = std::make_unique(lib_linux); #elif defined(TI_PLATFORM_WINDOWS) - loader_ = std::make_unique("cusparse.dll"); + loader_ = std::make_unique(lib_windows); #else static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); #endif @@ -104,34 +105,17 @@ CUSPARSEDriver::CUSPARSEDriver() { } } +CUSPARSEDriver::CUSPARSEDriver(){ + load_lib("libcusparse.so", "cusparse.dll"); +} + CUSPARSEDriver& CUSPARSEDriver::get_instance() { static CUSPARSEDriver* instance = new CUSPARSEDriver(); return *instance; } CUSOLVERDriver::CUSOLVERDriver() { - // TODO: enable cusparse and cusolver flag env variable. - auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); - if (disabled_by_env_) { - TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); - return; - } - -#if defined(TI_PLATFORM_LINUX) - loader_ = std::make_unique("libcusolver.so"); -#elif defined(TI_PLATFORM_WINDOWS) - loader_ = std::make_unique("cusolver.dll"); -#else - static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); -#endif - - if (!loader_->loaded()) { - TI_WARN("cusolver lib not found."); - return; - } - else { - TI_TRACE("cusolver loaded!"); - } + load_lib("libcusolver.so", "cusolver.dll"); } CUSOLVERDriver& CUSOLVERDriver::get_instance() { diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 770b98db743a0..e26ab156d297d 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -127,24 +127,30 @@ class CUDADriver { bool cuda_version_valid_{false}; }; +class CUDADriverBase { +protected: + std::unique_ptr loader_; + CUDADriverBase(); + + void load_lib(std::string lib_linux, std::string lib_windows); + +}; -class CUSPARSEDriver { +class CUSPARSEDriver: protected CUDADriverBase { public: static CUSPARSEDriver &get_instance(); private: - std::unique_ptr loader_; CUSPARSEDriver(); }; -class CUSOLVERDriver { +class CUSOLVERDriver: protected CUDADriverBase { public: static CUSOLVERDriver &get_instance(); private: - std::unique_ptr loader_; CUSOLVERDriver(); }; From e53279ae2965bd8cf2232109837ec4bc5f4931fa Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Wed, 4 May 2022 17:19:35 +0800 Subject: [PATCH 04/30] update comments --- taichi/backends/cuda/cuda_driver.h | 1 + 1 file changed, 1 insertion(+) diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index e26ab156d297d..b63887205fda2 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -96,6 +96,7 @@ class CUDADriverFunction { }; class CUDADriver { + // TODO: make CUDADriver a derived class of CUDADriverBase. public: #define PER_CUDA_FUNCTION(name, symbol_name, ...) \ CUDADriverFunction<__VA_ARGS__> name; From 4082d57d7b26e350a1c29a29db2075827dde74f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 May 2022 09:19:57 +0000 Subject: [PATCH 05/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/backends/cuda/cuda_context.cpp | 7 ++++--- taichi/backends/cuda/cuda_context.h | 4 ++-- taichi/backends/cuda/cuda_driver.cpp | 13 ++++++------- taichi/backends/cuda/cuda_driver.h | 20 +++++++------------- 4 files changed, 19 insertions(+), 25 deletions(-) diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index c4cb302570144..f1cb1ba5bba3c 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -13,9 +13,10 @@ TLANG_NAMESPACE_BEGIN CUDAContext::CUDAContext() - : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()), - cusparse_driver_(CUSPARSEDriver::get_instance()), - cusolver_driver_(CUSOLVERDriver::get_instance()) { + : profiler_(nullptr), + driver_(CUDADriver::get_instance_without_context()), + cusparse_driver_(CUSPARSEDriver::get_instance()), + cusolver_driver_(CUSOLVERDriver::get_instance()) { // CUDA initialization dev_count_ = 0; driver_.init(0); diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 4a8cca6755799..0834de1120d74 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -28,8 +28,8 @@ class CUDAContext { std::mutex lock_; KernelProfilerBase *profiler_; CUDADriver &driver_; - CUSPARSEDriver & cusparse_driver_; - CUSOLVERDriver & cusolver_driver_; + CUSPARSEDriver &cusparse_driver_; + CUSOLVERDriver &cusolver_driver_; bool debug_; public: diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 9ebb15af33a82..d703337421fee 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -99,18 +99,17 @@ void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { if (!loader_->loaded()) { TI_WARN("CUSPARSE lib not found."); return; - } - else { + } else { TI_TRACE("cusparse loaded!"); } } -CUSPARSEDriver::CUSPARSEDriver(){ +CUSPARSEDriver::CUSPARSEDriver() { load_lib("libcusparse.so", "cusparse.dll"); } -CUSPARSEDriver& CUSPARSEDriver::get_instance() { - static CUSPARSEDriver* instance = new CUSPARSEDriver(); +CUSPARSEDriver &CUSPARSEDriver::get_instance() { + static CUSPARSEDriver *instance = new CUSPARSEDriver(); return *instance; } @@ -118,8 +117,8 @@ CUSOLVERDriver::CUSOLVERDriver() { load_lib("libcusolver.so", "cusolver.dll"); } -CUSOLVERDriver& CUSOLVERDriver::get_instance() { - static CUSOLVERDriver* instance = new CUSOLVERDriver(); +CUSOLVERDriver &CUSOLVERDriver::get_instance() { + static CUSOLVERDriver *instance = new CUSOLVERDriver(); return *instance; } diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index b63887205fda2..6d0469fb1f415 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -129,33 +129,27 @@ class CUDADriver { }; class CUDADriverBase { - -protected: + protected: std::unique_ptr loader_; CUDADriverBase(); void load_lib(std::string lib_linux, std::string lib_windows); - }; -class CUSPARSEDriver: protected CUDADriverBase { -public: +class CUSPARSEDriver : protected CUDADriverBase { + public: static CUSPARSEDriver &get_instance(); -private: + private: CUSPARSEDriver(); - }; -class CUSOLVERDriver: protected CUDADriverBase { -public: +class CUSOLVERDriver : protected CUDADriverBase { + public: static CUSOLVERDriver &get_instance(); -private: + private: CUSOLVERDriver(); - }; - - TLANG_NAMESPACE_END From 39e0563253582e838012ef2f7c82b1cf62c1692a Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Tue, 17 May 2022 11:08:58 +0800 Subject: [PATCH 06/30] make CUDADriver a derived class from CUDADriverBase --- taichi/backends/cuda/cuda_driver.cpp | 60 ++++++++++++++++------------ taichi/backends/cuda/cuda_driver.h | 30 +++++++++----- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index d703337421fee..c4a002e6f8f20 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -20,24 +20,26 @@ bool CUDADriver::detected() { } CUDADriver::CUDADriver() { - disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); - if (disabled_by_env_) { - TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); - return; - } - -#if defined(TI_PLATFORM_LINUX) - loader_ = std::make_unique("libcuda.so"); -#elif defined(TI_PLATFORM_WINDOWS) - loader_ = std::make_unique("nvcuda.dll"); -#else - static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); -#endif - - if (!loader_->loaded()) { - TI_WARN("CUDA driver not found."); - return; - } +// disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); +// if (disabled_by_env_) { +// TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); +// return; +// } + +// #if defined(TI_PLATFORM_LINUX) +// loader_ = std::make_unique("libcuda.so"); +// #elif defined(TI_PLATFORM_WINDOWS) +// loader_ = std::make_unique("nvcuda.dll"); +// #else +// static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); +// #endif + + // if (!loader_->loaded()) { + // TI_WARN("CUDA driver not found."); + // return; + // } + + load_lib("libcuda.so", "nvcuda.dll"); loader_->load_function("cuGetErrorName", get_error_name); loader_->load_function("cuGetErrorString", get_error_string); @@ -80,7 +82,7 @@ CUDADriver &CUDADriver::get_instance() { CUDADriverBase::CUDADriverBase() { // TODO: enable cusparse and cusolver flag env variable. - auto disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); + disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); return; @@ -89,18 +91,26 @@ CUDADriverBase::CUDADriverBase() { void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { #if defined(TI_PLATFORM_LINUX) - loader_ = std::make_unique(lib_linux); + auto lib_name = lib_linux; #elif defined(TI_PLATFORM_WINDOWS) - loader_ = std::make_unique(lib_windows); + auto lib_name = lib_windows; #else static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); #endif - +// #if defined(TI_PLATFORM_LINUX) +// loader_ = std::make_unique(lib_linux); +// #elif defined(TI_PLATFORM_WINDOWS) +// loader_ = std::make_unique(lib_windows); +// #else +// static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); +// #endif + loader_ = std::make_unique(lib_name); if (!loader_->loaded()) { - TI_WARN("CUSPARSE lib not found."); + TI_WARN("{} lib not found.", lib_name); return; - } else { - TI_TRACE("cusparse loaded!"); + } + else { + TI_TRACE("{} loaded!", lib_name); } } diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 6d0469fb1f415..5b65a8671c8d0 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -95,7 +95,22 @@ class CUDADriverFunction { std::mutex *driver_lock_{nullptr}; }; -class CUDADriver { + +class CUDADriverBase { + +public: + ~CUDADriverBase() = default; + +protected: + std::unique_ptr loader_; + CUDADriverBase(); + + void load_lib(std::string lib_linux, std::string lib_windows); + + bool disabled_by_env_{false}; +}; + +class CUDADriver: protected CUDADriverBase { // TODO: make CUDADriver a derived class of CUDADriverBase. public: #define PER_CUDA_FUNCTION(name, symbol_name, ...) \ @@ -111,7 +126,7 @@ class CUDADriver { bool detected(); - ~CUDADriver() = default; + // ~CUDADriver() = default; static CUDADriver &get_instance(); @@ -120,21 +135,14 @@ class CUDADriver { private: CUDADriver(); - std::unique_ptr loader_; + // std::unique_ptr loader_; std::mutex lock_; - bool disabled_by_env_{false}; + // bool disabled_by_env_{false}; bool cuda_version_valid_{false}; }; -class CUDADriverBase { - protected: - std::unique_ptr loader_; - CUDADriverBase(); - - void load_lib(std::string lib_linux, std::string lib_windows); -}; class CUSPARSEDriver : protected CUDADriverBase { public: From 933589eb2ac25d83af896148742c12ce3223ea4e Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Tue, 17 May 2022 13:44:46 +0800 Subject: [PATCH 07/30] clean code --- taichi/backends/cuda/cuda_driver.cpp | 27 +-------------------------- taichi/backends/cuda/cuda_driver.h | 12 ++++-------- 2 files changed, 5 insertions(+), 34 deletions(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index c4a002e6f8f20..9e8ec44fe6d60 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -20,25 +20,6 @@ bool CUDADriver::detected() { } CUDADriver::CUDADriver() { -// disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); -// if (disabled_by_env_) { -// TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); -// return; -// } - -// #if defined(TI_PLATFORM_LINUX) -// loader_ = std::make_unique("libcuda.so"); -// #elif defined(TI_PLATFORM_WINDOWS) -// loader_ = std::make_unique("nvcuda.dll"); -// #else -// static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); -// #endif - - // if (!loader_->loaded()) { - // TI_WARN("CUDA driver not found."); - // return; - // } - load_lib("libcuda.so", "nvcuda.dll"); loader_->load_function("cuGetErrorName", get_error_name); @@ -97,13 +78,7 @@ void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { #else static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); #endif -// #if defined(TI_PLATFORM_LINUX) -// loader_ = std::make_unique(lib_linux); -// #elif defined(TI_PLATFORM_WINDOWS) -// loader_ = std::make_unique(lib_windows); -// #else -// static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); -// #endif + loader_ = std::make_unique(lib_name); if (!loader_->loaded()) { TI_WARN("{} lib not found.", lib_name); diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 5b65a8671c8d0..b29e32d52c834 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -111,7 +111,6 @@ class CUDADriverBase { }; class CUDADriver: protected CUDADriverBase { - // TODO: make CUDADriver a derived class of CUDADriverBase. public: #define PER_CUDA_FUNCTION(name, symbol_name, ...) \ CUDADriverFunction<__VA_ARGS__> name; @@ -126,8 +125,6 @@ class CUDADriver: protected CUDADriverBase { bool detected(); - // ~CUDADriver() = default; - static CUDADriver &get_instance(); static CUDADriver &get_instance_without_context(); @@ -135,25 +132,24 @@ class CUDADriver: protected CUDADriverBase { private: CUDADriver(); - // std::unique_ptr loader_; - std::mutex lock_; - // bool disabled_by_env_{false}; bool cuda_version_valid_{false}; }; -class CUSPARSEDriver : protected CUDADriverBase { +class CUSPARSEDriver: protected CUDADriverBase { public: + // TODO: Add cusparse function APIs static CUSPARSEDriver &get_instance(); private: CUSPARSEDriver(); }; -class CUSOLVERDriver : protected CUDADriverBase { +class CUSOLVERDriver: protected CUDADriverBase { public: + // TODO: Add cusolver function APIs static CUSOLVERDriver &get_instance(); private: From 8d468266854d99a6c8cee68f298925279a338850 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 May 2022 05:52:34 +0000 Subject: [PATCH 08/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- taichi/backends/cuda/cuda_driver.cpp | 3 +-- taichi/backends/cuda/cuda_driver.h | 13 +++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 9e8ec44fe6d60..17aec896722b1 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -83,8 +83,7 @@ void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { if (!loader_->loaded()) { TI_WARN("{} lib not found.", lib_name); return; - } - else { + } else { TI_TRACE("{} loaded!", lib_name); } } diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index b29e32d52c834..c0569d05402e2 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -95,13 +95,11 @@ class CUDADriverFunction { std::mutex *driver_lock_{nullptr}; }; - class CUDADriverBase { - -public: + public: ~CUDADriverBase() = default; -protected: + protected: std::unique_ptr loader_; CUDADriverBase(); @@ -110,7 +108,7 @@ class CUDADriverBase { bool disabled_by_env_{false}; }; -class CUDADriver: protected CUDADriverBase { +class CUDADriver : protected CUDADriverBase { public: #define PER_CUDA_FUNCTION(name, symbol_name, ...) \ CUDADriverFunction<__VA_ARGS__> name; @@ -137,8 +135,7 @@ class CUDADriver: protected CUDADriverBase { bool cuda_version_valid_{false}; }; - -class CUSPARSEDriver: protected CUDADriverBase { +class CUSPARSEDriver : protected CUDADriverBase { public: // TODO: Add cusparse function APIs static CUSPARSEDriver &get_instance(); @@ -147,7 +144,7 @@ class CUSPARSEDriver: protected CUDADriverBase { CUSPARSEDriver(); }; -class CUSOLVERDriver: protected CUDADriverBase { +class CUSOLVERDriver : protected CUDADriverBase { public: // TODO: Add cusolver function APIs static CUSOLVERDriver &get_instance(); From 222abeffeade6eac5b8c0f07453be0fcb1735721 Mon Sep 17 00:00:00 2001 From: Jiafeng-Liu Date: Tue, 17 May 2022 14:56:53 +0800 Subject: [PATCH 09/30] clean code --- taichi/backends/cuda/cuda_driver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 17aec896722b1..16370be1543ad 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -62,7 +62,6 @@ CUDADriver &CUDADriver::get_instance() { } CUDADriverBase::CUDADriverBase() { - // TODO: enable cusparse and cusolver flag env variable. disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); From 09c2ede5a16870838a512e3faa8e3a423840c9f2 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Fri, 27 May 2022 19:55:04 +0800 Subject: [PATCH 10/30] create sparse matrix using cusparse --- misc/sm_gpu_ndarray.py | 26 +++++++++++++ python/taichi/linalg/sparse_matrix.py | 8 ++++ taichi/backends/cuda/cuda_context.cpp | 4 ++ taichi/backends/cuda/cuda_context.h | 1 + taichi/backends/cuda/cuda_driver.cpp | 13 +++++++ taichi/backends/cuda/cuda_driver.h | 13 +++++++ taichi/backends/cuda/cuda_types.h | 54 +++++++++++++++++++++++++++ taichi/program/sparse_matrix.cpp | 18 +++++++++ taichi/program/sparse_matrix.h | 4 ++ taichi/python/export_lang.cpp | 17 ++++++--- 10 files changed, 152 insertions(+), 6 deletions(-) create mode 100644 misc/sm_gpu_ndarray.py diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py new file mode 100644 index 0000000000000..95b92b5aa0493 --- /dev/null +++ b/misc/sm_gpu_ndarray.py @@ -0,0 +1,26 @@ +import taichi as ti +import numpy as np +ti.init(arch=ti.cuda) + + +row_csr = ti.ndarray(shape=5, dtype=ti.f32) +col_csr = ti.ndarray(shape=9, dtype=ti.f32) +value_csr = ti.ndarray(shape=9, dtype=ti.f32) + +h_row_csr = np.asarray([ 0, 3, 4, 7, 9]) +h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3 ]) +h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0,6.0, 7.0, 8.0, 9.0]) + + +for i in range(5): + row_csr[i] = h_row_csr[i] +for i in range(9): + col_csr[i] = h_col_csr[i] +for i in range(9): + value_csr[i] = h_value_csr[i] + + + +A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) + +A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) \ No newline at end of file diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 83b4eb15d3b86..076dd7ebce1c1 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -194,6 +194,14 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) + def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr): + if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray): + get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( + self.matrix, row_csr.arr, col_csr.arr, value_csr.arr) + else: + raise TaichiRuntimeError( + 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' + ) class SparseMatrixBuilder: """A python wrap around sparse matrix builder. diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index f1cb1ba5bba3c..06342dd934cc6 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -53,6 +53,9 @@ CUDAContext::CUDAContext() mcpu_ = fmt::format("sm_{}", compute_capability_); TI_TRACE("Emitting CUDA code for {}", mcpu_); + + // Initialize the CUSPARSE library + cusparse_driver_.cpCreate(&cusparse_handle_); } std::size_t CUDAContext::get_total_memory() { @@ -130,6 +133,7 @@ CUDAContext::~CUDAContext() { CUDADriver::get_instance().cuModuleUnload(cudaModule); CUDADriver::get_instance().cuCtxDestroy(context); */ + cusparse_driver_.cpDestroy(cusparse_handle_); } CUDAContext &CUDAContext::get_instance() { diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 0834de1120d74..4d212428ebfff 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -30,6 +30,7 @@ class CUDAContext { CUDADriver &driver_; CUSPARSEDriver &cusparse_driver_; CUSOLVERDriver &cusolver_driver_; + cusparseHandle_t cusparse_handle_; bool debug_; public: diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 16370be1543ad..7a3bc2b92c496 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -89,6 +89,19 @@ void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { CUSPARSEDriver::CUSPARSEDriver() { load_lib("libcusparse.so", "cusparse.dll"); + + #define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ + name.set(loader_->load_function(#symbol_name)); \ + name.set_lock(&lock_); \ + name.set_names(#name, #symbol_name); + PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); + PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); + PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); + PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); + PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); + PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); + PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + #undef PER_CUSPARSE_FUNCTION } CUSPARSEDriver &CUSPARSEDriver::get_instance() { diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index c0569d05402e2..d8202302ff337 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -140,8 +140,21 @@ class CUSPARSEDriver : protected CUDADriverBase { // TODO: Add cusparse function APIs static CUSPARSEDriver &get_instance(); + #define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ + CUDADriverFunction<__VA_ARGS__> name; + PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); + PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); + PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); + PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); + PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); + PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); + PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + #undef PER_CUSPARSE_FUNCTION + + private: CUSPARSEDriver(); + std::mutex lock_; }; class CUSOLVERDriver : protected CUDADriverBase { diff --git a/taichi/backends/cuda/cuda_types.h b/taichi/backends/cuda/cuda_types.h index 04fd262df9756..2a331060f2223 100644 --- a/taichi/backends/cuda/cuda_types.h +++ b/taichi/backends/cuda/cuda_types.h @@ -3,6 +3,7 @@ #if defined(TI_WITH_CUDA_TOOLKIT) #include +#include #else @@ -434,3 +435,56 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20 #endif + + +// copy from cusparse.h +struct cusparseContext; +typedef struct cusparseContext* cusparseHandle_t; + +struct cusparseDnVecDescr; +struct cusparseSpMatDescr; +typedef struct cusparseDnVecDescr* cusparseDnVecDescr_t; +typedef struct cusparseSpMatDescr* cusparseSpMatDescr_t; +typedef enum { + CUSPARSE_INDEX_16U = 1, ///< 16-bit unsigned integer for matrix/vector + ///< indices + CUSPARSE_INDEX_32I = 2, ///< 32-bit signed integer for matrix/vector indices + CUSPARSE_INDEX_64I = 3 ///< 64-bit signed integer for matrix/vector indices +} cusparseIndexType_t; + +typedef enum { + CUSPARSE_INDEX_BASE_ZERO = 0, + CUSPARSE_INDEX_BASE_ONE = 1 +} cusparseIndexBase_t; + +typedef enum cudaDataType_t +{ + CUDA_R_16F = 2, /* real as a half */ + CUDA_C_16F = 6, /* complex as a pair of half numbers */ + CUDA_R_16BF = 14, /* real as a nv_bfloat16 */ + CUDA_C_16BF = 15, /* complex as a pair of nv_bfloat16 numbers */ + CUDA_R_32F = 0, /* real as a float */ + CUDA_C_32F = 4, /* complex as a pair of float numbers */ + CUDA_R_64F = 1, /* real as a double */ + CUDA_C_64F = 5, /* complex as a pair of double numbers */ + CUDA_R_4I = 16, /* real as a signed 4-bit int */ + CUDA_C_4I = 17, /* complex as a pair of signed 4-bit int numbers */ + CUDA_R_4U = 18, /* real as a unsigned 4-bit int */ + CUDA_C_4U = 19, /* complex as a pair of unsigned 4-bit int numbers */ + CUDA_R_8I = 3, /* real as a signed 8-bit int */ + CUDA_C_8I = 7, /* complex as a pair of signed 8-bit int numbers */ + CUDA_R_8U = 8, /* real as a unsigned 8-bit int */ + CUDA_C_8U = 9, /* complex as a pair of unsigned 8-bit int numbers */ + CUDA_R_16I = 20, /* real as a signed 16-bit int */ + CUDA_C_16I = 21, /* complex as a pair of signed 16-bit int numbers */ + CUDA_R_16U = 22, /* real as a unsigned 16-bit int */ + CUDA_C_16U = 23, /* complex as a pair of unsigned 16-bit int numbers */ + CUDA_R_32I = 10, /* real as a signed 32-bit int */ + CUDA_C_32I = 11, /* complex as a pair of signed 32-bit int numbers */ + CUDA_R_32U = 12, /* real as a unsigned 32-bit int */ + CUDA_C_32U = 13, /* complex as a pair of unsigned 32-bit int numbers */ + CUDA_R_64I = 24, /* real as a signed 64-bit int */ + CUDA_C_64I = 25, /* complex as a pair of signed 64-bit int numbers */ + CUDA_R_64U = 26, /* real as a unsigned 64-bit int */ + CUDA_C_64U = 27 /* complex as a pair of unsigned 64-bit int numbers */ +} cudaDataType; \ No newline at end of file diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index c728b033e468e..fba8687aaa5e9 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -1,4 +1,5 @@ #include "taichi/program/sparse_matrix.h" +#include "taichi/backends/cuda/cuda_driver.h" #include #include @@ -191,5 +192,22 @@ void make_sparse_matrix_from_ndarray(Program *prog, } } +void make_sparse_matrix_from_ndarray_cusparse(Program *prog, + SparseMatrix &sm, + const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values) { + std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); + int row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); + int col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); + int values_csr = prog->get_ndarray_data_ptr_as_int(&values); + int nnz = values.get_nelement() / 3; + int A_num_rows = sm.num_rows(); + int A_num_cols = sm.num_cols(); + cusparseSpMatDescr_t matA; + CUSPARSEDriver::get_instance().cpCreateCsr(&matA, A_num_rows, A_num_cols, nnz, + (void*)(size_t)row_csr, (void*)(size_t)col_csr, (void*)(size_t)values_csr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); +} + } // namespace lang } // namespace taichi diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 9501fc2781469..800b7b5510e27 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -5,6 +5,7 @@ #include "taichi/ir/type_utils.h" #include "taichi/program/ndarray.h" #include "taichi/program/program.h" +#include "taichi/backends/cuda/cuda_context.h" #include "Eigen/Sparse" @@ -198,5 +199,8 @@ std::unique_ptr make_sparse_matrix( void make_sparse_matrix_from_ndarray(Program *prog, SparseMatrix &sm, const Ndarray &ndarray); +void make_sparse_matrix_from_ndarray_cusparse(Program *prog, + SparseMatrix &sm, + const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values); } // namespace lang } // namespace taichi diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index f4f63e458bbca..bc923eb79f8d1 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -369,24 +369,29 @@ void export_lang(py::module &m) { .def("create_sparse_matrix_builder", [](Program *program, int n, int m, uint64 max_num_entries, DataType dtype, const std::string &storage_format) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); + // TI_ERROR_IF(!arch_is_cpu(program->config.arch), + // "SparseMatrix only supports CPU for now."); return SparseMatrixBuilder(n, m, max_num_entries, dtype, storage_format); }) .def("create_sparse_matrix", [](Program *program, int n, int m, DataType dtype, std::string storage_format) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); + // TI_ERROR_IF(!arch_is_cpu(program->config.arch), + // "SparseMatrix only supports CPU for now."); return make_sparse_matrix(n, m, dtype, storage_format); }) .def("make_sparse_matrix_from_ndarray", [](Program *program, SparseMatrix &sm, const Ndarray &ndarray) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); + // TI_ERROR_IF(!arch_is_cpu(program->config.arch), + // "SparseMatrix only supports CPU for now."); return make_sparse_matrix_from_ndarray(program, sm, ndarray); }) + .def("make_sparse_matrix_from_ndarray_cusparse", + [](Program *program, SparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr) { + return make_sparse_matrix_from_ndarray_cusparse(program, sm, row_csr, col_csr, val_csr); + } + ) .def( "dump_dot", [](Program *program, std::optional rankdir, From 2f4b0f1c9d68a7aefac17f9af1d66a58d260d1d0 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Sat, 28 May 2022 01:28:03 +0800 Subject: [PATCH 11/30] spmv with bugs --- misc/sm_gpu_ndarray.py | 39 +++++++++------ python/taichi/linalg/sparse_matrix.py | 21 ++++++-- taichi/backends/cuda/cuda_context.cpp | 5 +- taichi/backends/cuda/cuda_context.h | 7 ++- taichi/backends/cuda/cuda_driver.cpp | 8 +-- taichi/backends/cuda/cuda_driver.h | 8 +-- taichi/backends/cuda/cuda_types.h | 16 +++++- taichi/backends/cuda/cusparse_functions.inc.h | 18 +++++++ taichi/program/sparse_matrix.cpp | 50 +++++++++++++++++-- taichi/program/sparse_matrix.h | 6 ++- taichi/python/export_lang.cpp | 4 +- 11 files changed, 136 insertions(+), 46 deletions(-) create mode 100644 taichi/backends/cuda/cusparse_functions.inc.h diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py index 95b92b5aa0493..309eec6609b6f 100644 --- a/misc/sm_gpu_ndarray.py +++ b/misc/sm_gpu_ndarray.py @@ -1,26 +1,33 @@ import taichi as ti import numpy as np -ti.init(arch=ti.cuda) +ti.init(arch=ti.cuda, gdb_trigger=True) +idx_dt = ti.int32 +val_dt = ti.f32 +row_csr = ti.ndarray(shape=5, dtype=idx_dt) +col_csr = ti.ndarray(shape=9, dtype=idx_dt) +value_csr = ti.ndarray(shape=9, dtype=val_dt) +X = ti.ndarray(shape=4, dtype=val_dt) +Y = ti.ndarray(shape=4, dtype=val_dt) +Y_result = ti.ndarray(shape=4, dtype=val_dt) -row_csr = ti.ndarray(shape=5, dtype=ti.f32) -col_csr = ti.ndarray(shape=9, dtype=ti.f32) -value_csr = ti.ndarray(shape=9, dtype=ti.f32) -h_row_csr = np.asarray([ 0, 3, 4, 7, 9]) -h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3 ]) -h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0,6.0, 7.0, 8.0, 9.0]) - - -for i in range(5): - row_csr[i] = h_row_csr[i] -for i in range(9): - col_csr[i] = h_col_csr[i] -for i in range(9): - value_csr[i] = h_value_csr[i] +h_row_csr = np.asarray([ 0, 3, 4, 7, 9], dtype=np.int32) +h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3 ], dtype=np.int32) +h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0,6.0, 7.0, 8.0, 9.0], dtype=np.float32) +h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) +h_Y_result = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) +row_csr.from_numpy(h_row_csr) +col_csr.from_numpy(h_col_csr) +value_csr.from_numpy(h_value_csr) +X.from_numpy(h_X) +Y.fill(0.0) A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) -A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) \ No newline at end of file +A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr, X, Y) + +for i in range(4): + print(f"{Y[i]} == {h_Y_result[i]} : {Y[i] == h_Y_result[i]}") \ No newline at end of file diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 076dd7ebce1c1..cc07e7c710adc 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -194,14 +194,29 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) - def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr): - if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray): + def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr, x, y): + if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray) and isinstance(x, Ndarray) and isinstance(y, Ndarray): get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( - self.matrix, row_csr.arr, col_csr.arr, value_csr.arr) + self.matrix, row_csr.arr, col_csr.arr, value_csr.arr, x.arr, y.arr) else: raise TaichiRuntimeError( 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' ) + def spmv(self, x): + """Sparse matrix-vector multiplication. + + Args: + x (ti.Vector): the vector to be multiplied. + Returns: + The result of matrix-vector multiplication. + """ + if not isinstance(x, Ndarray): + raise TaichiRuntimeError( + 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' + ) + if self.m != x.shape[0]: + raise TaichiRuntimeError(f"Dimension mismatch between sparse matrix ({self.n}, {self.m}) and vector ({x.shape})") + return self.matrix.spmv(x) class SparseMatrixBuilder: """A python wrap around sparse matrix builder. diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index 06342dd934cc6..386113e3f89b0 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -53,9 +53,6 @@ CUDAContext::CUDAContext() mcpu_ = fmt::format("sm_{}", compute_capability_); TI_TRACE("Emitting CUDA code for {}", mcpu_); - - // Initialize the CUSPARSE library - cusparse_driver_.cpCreate(&cusparse_handle_); } std::size_t CUDAContext::get_total_memory() { @@ -133,7 +130,7 @@ CUDAContext::~CUDAContext() { CUDADriver::get_instance().cuModuleUnload(cudaModule); CUDADriver::get_instance().cuCtxDestroy(context); */ - cusparse_driver_.cpDestroy(cusparse_handle_); +// cusparse_driver_.cpDestroy(cusparse_handle_); } CUDAContext &CUDAContext::get_instance() { diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 4d212428ebfff..33ceb3ca62c30 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -30,7 +30,6 @@ class CUDAContext { CUDADriver &driver_; CUSPARSEDriver &cusparse_driver_; CUSOLVERDriver &cusolver_driver_; - cusparseHandle_t cusparse_handle_; bool debug_; public: @@ -75,6 +74,10 @@ class CUDAContext { return compute_capability_; } + // cusparseHandle_t& get_cusparse_handle(){ + // return cusparse_handle_; + // } + ~CUDAContext(); class ContextGuard { @@ -94,6 +97,8 @@ class CUDAContext { CUDADriver::get_instance().context_set_current(old_ctx_); } } + + }; ContextGuard get_guard() { diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 7a3bc2b92c496..26ff2e3ec45e0 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -94,13 +94,7 @@ CUSPARSEDriver::CUSPARSEDriver() { name.set(loader_->load_function(#symbol_name)); \ name.set_lock(&lock_); \ name.set_names(#name, #symbol_name); - PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); - PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); - PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); - PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); - PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); - PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); - PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + #include "taichi/backends/cuda/cusparse_functions.inc.h" #undef PER_CUSPARSE_FUNCTION } diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index d8202302ff337..98139e3106312 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -142,13 +142,7 @@ class CUSPARSEDriver : protected CUDADriverBase { #define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ CUDADriverFunction<__VA_ARGS__> name; - PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); - PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); - PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); - PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); - PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); - PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); - PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + #include "taichi/backends/cuda/cusparse_functions.inc.h" #undef PER_CUSPARSE_FUNCTION diff --git a/taichi/backends/cuda/cuda_types.h b/taichi/backends/cuda/cuda_types.h index 2a331060f2223..ec0d8cb35baae 100644 --- a/taichi/backends/cuda/cuda_types.h +++ b/taichi/backends/cuda/cuda_types.h @@ -487,4 +487,18 @@ typedef enum cudaDataType_t CUDA_C_64I = 25, /* complex as a pair of signed 64-bit int numbers */ CUDA_R_64U = 26, /* real as a unsigned 64-bit int */ CUDA_C_64U = 27 /* complex as a pair of unsigned 64-bit int numbers */ -} cudaDataType; \ No newline at end of file +} cudaDataType; + +typedef enum { + CUSPARSE_OPERATION_NON_TRANSPOSE = 0, + CUSPARSE_OPERATION_TRANSPOSE = 1, + CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2 +} cusparseOperation_t; + +typedef enum { + CUSPARSE_SPMV_ALG_DEFAULT = 0, + CUSPARSE_SPMV_COO_ALG1 = 1, + CUSPARSE_SPMV_CSR_ALG1 = 2, + CUSPARSE_SPMV_CSR_ALG2 = 3, + CUSPARSE_SPMV_COO_ALG2 = 4 +} cusparseSpMVAlg_t; \ No newline at end of file diff --git a/taichi/backends/cuda/cusparse_functions.inc.h b/taichi/backends/cuda/cusparse_functions.inc.h new file mode 100644 index 0000000000000..284d747ea4fc3 --- /dev/null +++ b/taichi/backends/cuda/cusparse_functions.inc.h @@ -0,0 +1,18 @@ +// clang-format off + +// cusparse setup +PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); +PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); + +// cusparse sparse matrix description +PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); +PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); +PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); + +// cusparse dense vector description +PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); +PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + +// cusparse sparse matrix-vector multiplication +PER_CUSPARSE_FUNCTION(cpSpMV_bufferSize, cusparseSpMV_bufferSize, cusparseHandle_t, cusparseOperation_t, const void*,cusparseSpMatDescr_t, cusparseDnVecDescr_t,const void*, cusparseDnVecDescr_t,cudaDataType, cusparseSpMVAlg_t, size_t*); +PER_CUSPARSE_FUNCTION(cpSpMV, cusparseSpMV, cusparseHandle_t, cusparseOperation_t, const void*,cusparseSpMatDescr_t, cusparseDnVecDescr_t,const void*, cusparseDnVecDescr_t,cudaDataType, cusparseSpMVAlg_t, void*); \ No newline at end of file diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index fba8687aaa5e9..0a888bec02e93 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -194,12 +194,18 @@ void make_sparse_matrix_from_ndarray(Program *prog, void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, - const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values) { - std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); - int row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); + const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values, const Ndarray &x, Ndarray &y) { + size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); + int* h_row_csr = (int*)malloc(sizeof(int) * 5); + CUDADriver::get_instance().memcpy_device_to_host((void*)h_row_csr, (void*)row_csr, sizeof(int) * 5); + for(auto i =0; i<5; i++) + printf("row_csr[%d] = %d \t",i, h_row_csr[i]); + printf("\n"); + free(h_row_csr); + int col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); int values_csr = prog->get_ndarray_data_ptr_as_int(&values); - int nnz = values.get_nelement() / 3; + int nnz = values.get_nelement(); int A_num_rows = sm.num_rows(); int A_num_cols = sm.num_cols(); cusparseSpMatDescr_t matA; @@ -207,6 +213,42 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, (void*)(size_t)row_csr, (void*)(size_t)col_csr, (void*)(size_t)values_csr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + size_t dX = prog->get_ndarray_data_ptr_as_int(&x); + size_t dY = prog->get_ndarray_data_ptr_as_int(&y); + + cusparseDnVecDescr_t vecX, vecY; + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, A_num_cols,(void*) dX, CUDA_R_32F); + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, A_num_cols,(void*) dY, CUDA_R_32F); + + // auto handle = CUDAContext::get_instance().get_cusparse_handle(); + cusparseHandle_t cusparse_handle; + CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); + float alpha = 1.0f, beta = 0.0f; + size_t bufferSize = 0; + CUSPARSEDriver::get_instance().cpSpMV_bufferSize(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + CUSPARSE_SPMV_CSR_ALG1, &bufferSize); + printf("bufferSize: %lu\n", bufferSize); + + void* dBuffer = NULL; + // CUDADriver::get_instance().stream_synchronize(nullptr); + CUDADriver::get_instance().malloc(&dBuffer, bufferSize); + CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + CUSPARSE_SPMV_CSR_ALG1, dBuffer); + + // float32* h_y = (float32*)malloc(sizeof(float32) * A_num_rows); + // CUDADriver::get_instance().memcpy_device_to_host((void*)h_y, (void*)dY, sizeof(float32) * A_num_rows); + // for(auto i =0; i Date: Tue, 14 Jun 2022 14:04:39 +0800 Subject: [PATCH 12/30] bug fix: int -> size_t --- misc/sm_gpu_ndarray.py | 2 +- taichi/backends/cuda/cusparse_functions.inc.h | 1 + taichi/program/sparse_matrix.cpp | 32 +++++-------------- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py index 309eec6609b6f..cf6f973542787 100644 --- a/misc/sm_gpu_ndarray.py +++ b/misc/sm_gpu_ndarray.py @@ -1,6 +1,6 @@ import taichi as ti import numpy as np -ti.init(arch=ti.cuda, gdb_trigger=True) +ti.init(arch=ti.cuda, gdb_trigger=False) idx_dt = ti.int32 val_dt = ti.f32 diff --git a/taichi/backends/cuda/cusparse_functions.inc.h b/taichi/backends/cuda/cusparse_functions.inc.h index 284d747ea4fc3..dc87f9831cbf6 100644 --- a/taichi/backends/cuda/cusparse_functions.inc.h +++ b/taichi/backends/cuda/cusparse_functions.inc.h @@ -7,6 +7,7 @@ PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); // cusparse sparse matrix description PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); +PER_CUSPARSE_FUNCTION(cpSpMatGetSize, cusparseSpMatGetSize, cusparseSpMatDescr_t, int64_t*, int64_t*, int64_t*); PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); // cusparse dense vector description diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 0a888bec02e93..96d093b1a8815 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -196,21 +196,14 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values, const Ndarray &x, Ndarray &y) { size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); - int* h_row_csr = (int*)malloc(sizeof(int) * 5); - CUDADriver::get_instance().memcpy_device_to_host((void*)h_row_csr, (void*)row_csr, sizeof(int) * 5); - for(auto i =0; i<5; i++) - printf("row_csr[%d] = %d \t",i, h_row_csr[i]); - printf("\n"); - free(h_row_csr); - - int col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); - int values_csr = prog->get_ndarray_data_ptr_as_int(&values); + size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); + size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); int nnz = values.get_nelement(); int A_num_rows = sm.num_rows(); int A_num_cols = sm.num_cols(); cusparseSpMatDescr_t matA; CUSPARSEDriver::get_instance().cpCreateCsr(&matA, A_num_rows, A_num_cols, nnz, - (void*)(size_t)row_csr, (void*)(size_t)col_csr, (void*)(size_t)values_csr, + (void*)row_csr, (void*)col_csr, (void*)values_csr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); size_t dX = prog->get_ndarray_data_ptr_as_int(&x); @@ -220,7 +213,6 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, A_num_cols,(void*) dX, CUDA_R_32F); CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, A_num_cols,(void*) dY, CUDA_R_32F); - // auto handle = CUDAContext::get_instance().get_cusparse_handle(); cusparseHandle_t cusparse_handle; CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); float alpha = 1.0f, beta = 0.0f; @@ -228,7 +220,6 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, CUSPARSEDriver::get_instance().cpSpMV_bufferSize(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize); - printf("bufferSize: %lu\n", bufferSize); void* dBuffer = NULL; // CUDADriver::get_instance().stream_synchronize(nullptr); @@ -237,18 +228,11 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer); - // float32* h_y = (float32*)malloc(sizeof(float32) * A_num_rows); - // CUDADriver::get_instance().memcpy_device_to_host((void*)h_y, (void*)dY, sizeof(float32) * A_num_rows); - // for(auto i =0; i Date: Tue, 14 Jun 2022 18:01:30 +0800 Subject: [PATCH 13/30] clean --- taichi/backends/cuda/cuda_context.cpp | 5 ++--- taichi/backends/cuda/cuda_context.h | 4 ---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index 386113e3f89b0..1e204cff5f2be 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -14,9 +14,8 @@ TLANG_NAMESPACE_BEGIN CUDAContext::CUDAContext() : profiler_(nullptr), - driver_(CUDADriver::get_instance_without_context()), - cusparse_driver_(CUSPARSEDriver::get_instance()), - cusolver_driver_(CUSOLVERDriver::get_instance()) { + driver_(CUDADriver::get_instance_without_context()) + { // CUDA initialization dev_count_ = 0; driver_.init(0); diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 33ceb3ca62c30..5e3801f302d33 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -15,8 +15,6 @@ TLANG_NAMESPACE_BEGIN // cases such as unit testing where many Taichi programs are created/destroyed. class CUDADriver; -class CUSPARSEDriver; -class CUSOLVERDriver; class CUDAContext { private: @@ -28,8 +26,6 @@ class CUDAContext { std::mutex lock_; KernelProfilerBase *profiler_; CUDADriver &driver_; - CUSPARSEDriver &cusparse_driver_; - CUSOLVERDriver &cusolver_driver_; bool debug_; public: From d7035f122d8015b43738a24241d136af7bf10312 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 09:54:15 +0800 Subject: [PATCH 14/30] add safe loader --- taichi/backends/cuda/cuda_driver.cpp | 24 ++++++++++++++++-------- taichi/backends/cuda/cuda_driver.h | 7 ++++++- taichi/program/sparse_matrix.cpp | 6 ++++++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index 26ff2e3ec45e0..cd65dcdb7ff2f 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -69,7 +69,7 @@ CUDADriverBase::CUDADriverBase() { } } -void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { +bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { #if defined(TI_PLATFORM_LINUX) auto lib_name = lib_linux; #elif defined(TI_PLATFORM_WINDOWS) @@ -81,26 +81,34 @@ void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { loader_ = std::make_unique(lib_name); if (!loader_->loaded()) { TI_WARN("{} lib not found.", lib_name); - return; + return false; } else { TI_TRACE("{} loaded!", lib_name); } + return true; } CUSPARSEDriver::CUSPARSEDriver() { - load_lib("libcusparse.so", "cusparse.dll"); +} + +CUSPARSEDriver &CUSPARSEDriver::get_instance() { + static CUSPARSEDriver *instance = new CUSPARSEDriver(); + return *instance; +} +bool CUSPARSEDriver::load_cusparse() { + cusparse_loaded_ = load_lib("libcusparse.so", "cusparse.dll"); + + if (!cusparse_loaded_) { + return false; + } #define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ name.set(loader_->load_function(#symbol_name)); \ name.set_lock(&lock_); \ name.set_names(#name, #symbol_name); #include "taichi/backends/cuda/cusparse_functions.inc.h" #undef PER_CUSPARSE_FUNCTION -} - -CUSPARSEDriver &CUSPARSEDriver::get_instance() { - static CUSPARSEDriver *instance = new CUSPARSEDriver(); - return *instance; + return cusparse_loaded_; } CUSOLVERDriver::CUSOLVERDriver() { diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 98139e3106312..08575d76fbca0 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -103,7 +103,7 @@ class CUDADriverBase { std::unique_ptr loader_; CUDADriverBase(); - void load_lib(std::string lib_linux, std::string lib_windows); + bool load_lib(std::string lib_linux, std::string lib_windows); bool disabled_by_env_{false}; }; @@ -145,10 +145,15 @@ class CUSPARSEDriver : protected CUDADriverBase { #include "taichi/backends/cuda/cusparse_functions.inc.h" #undef PER_CUSPARSE_FUNCTION + bool load_cusparse(); + bool is_loaded() { + return cusparse_loaded_; + } private: CUSPARSEDriver(); std::mutex lock_; + bool cusparse_loaded_{false}; }; class CUSOLVERDriver : protected CUDADriverBase { diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 96d093b1a8815..ae93f9f0e28c5 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -195,6 +195,12 @@ void make_sparse_matrix_from_ndarray(Program *prog, void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values, const Ndarray &x, Ndarray &y) { + if (!CUSPARSEDriver::get_instance().is_loaded()) { + bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); + if (!load_success) { + TI_ERROR("Failed to load cusparse library!"); + } + } size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); From 214d56aa5ac767f0a99d906dd627a3971c114ae0 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 11:17:45 +0800 Subject: [PATCH 15/30] seperate sparse matrix maker and spmv func --- misc/sm_gpu_ndarray.py | 4 +- python/taichi/linalg/sparse_matrix.py | 14 +++--- taichi/program/sparse_matrix.cpp | 68 +++++++++++++++++++++------ taichi/program/sparse_matrix.h | 29 ++++++++++-- taichi/python/export_lang.cpp | 15 ++++-- 5 files changed, 101 insertions(+), 29 deletions(-) diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py index cf6f973542787..ff20e9dfd1678 100644 --- a/misc/sm_gpu_ndarray.py +++ b/misc/sm_gpu_ndarray.py @@ -27,7 +27,9 @@ A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) -A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr, X, Y) +A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) + +A.spmv(X, Y) for i in range(4): print(f"{Y[i]} == {h_Y_result[i]} : {Y[i] == h_Y_result[i]}") \ No newline at end of file diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index cc07e7c710adc..21b6bf4d2a165 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -194,15 +194,16 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) - def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr, x, y): - if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray) and isinstance(x, Ndarray) and isinstance(y, Ndarray): + def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr): + if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray): + print(type(self.matrix)) get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( - self.matrix, row_csr.arr, col_csr.arr, value_csr.arr, x.arr, y.arr) + self.matrix, row_csr.arr, col_csr.arr, value_csr.arr) else: raise TaichiRuntimeError( 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' ) - def spmv(self, x): + def spmv(self, x, y): """Sparse matrix-vector multiplication. Args: @@ -210,13 +211,14 @@ def spmv(self, x): Returns: The result of matrix-vector multiplication. """ - if not isinstance(x, Ndarray): + if not isinstance(x, Ndarray) or not isinstance(y, Ndarray): raise TaichiRuntimeError( 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' ) if self.m != x.shape[0]: raise TaichiRuntimeError(f"Dimension mismatch between sparse matrix ({self.n}, {self.m}) and vector ({x.shape})") - return self.matrix.spmv(x) + + self.matrix.spmv(get_runtime().prog, x.arr, y.arr) class SparseMatrixBuilder: """A python wrap around sparse matrix builder. diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index ae93f9f0e28c5..8947906268cbe 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -163,6 +163,10 @@ std::unique_ptr make_sparse_matrix( storage_format); } +std::unique_ptr make_cu_sparse_matrix(int rows,int cols,DataType dt){ + return std::unique_ptr(std::make_unique(rows, cols, dt)); +} + template void build_ndarray_template(SparseMatrix &sm, intptr_t data_ptr, @@ -192,9 +196,19 @@ void make_sparse_matrix_from_ndarray(Program *prog, } } + +void CuSparseMatrix::build_csr(void *csr_ptr, void* csr_indices_ptr, void* csr_values_ptr, int nnz){ + CUSPARSEDriver::get_instance().cpCreateCsr(&matrix_, rows_, cols_, nnz, + csr_ptr, csr_indices_ptr, csr_values_ptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); +} +CuSparseMatrix::~CuSparseMatrix(){ + CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_); +} void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, - const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values, const Ndarray &x, Ndarray &y) { + const Ndarray &row_offsets,const Ndarray &col_indices,const Ndarray &values) { if (!CUSPARSEDriver::get_instance().is_loaded()) { bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); if (!load_success) { @@ -205,36 +219,62 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); int nnz = values.get_nelement(); - int A_num_rows = sm.num_rows(); - int A_num_cols = sm.num_cols(); - cusparseSpMatDescr_t matA; - CUSPARSEDriver::get_instance().cpCreateCsr(&matA, A_num_rows, A_num_cols, nnz, - (void*)row_csr, (void*)col_csr, (void*)values_csr, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + sm.build_csr((void*)row_csr, (void*)col_csr, (void*)values_csr, nnz); + + // int A_num_rows = sm.num_rows(); + // int A_num_cols = sm.num_cols(); + // cusparseSpMatDescr_t matA; + // CUSPARSEDriver::get_instance().cpCreateCsr(&matA, A_num_rows, A_num_cols, nnz, + // (void*)row_csr, (void*)col_csr, (void*)values_csr, + // CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + // CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); + // size_t dX = prog->get_ndarray_data_ptr_as_int(&x); + // size_t dY = prog->get_ndarray_data_ptr_as_int(&y); + // cusparseDnVecDescr_t vecX, vecY; + // CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, A_num_cols,(void*) dX, CUDA_R_32F); + // CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, A_num_cols,(void*) dY, CUDA_R_32F); + // cusparseHandle_t cusparse_handle; + // CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); + // float alpha = 1.0f, beta = 0.0f; + // size_t bufferSize = 0; + // CUSPARSEDriver::get_instance().cpSpMV_bufferSize(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + // &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + // CUSPARSE_SPMV_CSR_ALG1, &bufferSize); + // void* dBuffer = NULL; + // // CUDADriver::get_instance().stream_synchronize(nullptr); + // CUDADriver::get_instance().malloc(&dBuffer, bufferSize); + // CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + // &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + // CUSPARSE_SPMV_CSR_ALG1, dBuffer); + // CUSPARSEDriver::get_instance().cpDestroySpMat(matA); + // CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX); + // CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY); + // CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); + // CUDADriver::get_instance().mem_free(dBuffer); +} + +void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y){ size_t dX = prog->get_ndarray_data_ptr_as_int(&x); size_t dY = prog->get_ndarray_data_ptr_as_int(&y); cusparseDnVecDescr_t vecX, vecY; - CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, A_num_cols,(void*) dX, CUDA_R_32F); - CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, A_num_cols,(void*) dY, CUDA_R_32F); + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, cols_,(void*) dX, CUDA_R_32F); + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, rows_,(void*) dY, CUDA_R_32F); cusparseHandle_t cusparse_handle; CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); float alpha = 1.0f, beta = 0.0f; size_t bufferSize = 0; CUSPARSEDriver::get_instance().cpSpMV_bufferSize(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + &alpha, matrix_, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize); void* dBuffer = NULL; - // CUDADriver::get_instance().stream_synchronize(nullptr); CUDADriver::get_instance().malloc(&dBuffer, bufferSize); CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, + &alpha, matrix_, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer); - CUSPARSEDriver::get_instance().cpDestroySpMat(matA); CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX); CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY); CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index ca6c9dc143ba5..5f9d84dfedf4e 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -59,7 +59,13 @@ class SparseMatrix { } virtual ~SparseMatrix() = default; - virtual void build_triplets(void *triplets_adr){}; + virtual void build_triplets(void *triplets_adr){ + TI_WARN("SparseMatrix::build_triplets is not implemented!"); + }; + + virtual void build_csr(void *csr_ptr, void* csr_indices_ptr, void* csr_values_ptr, int nnz){ + TI_WARN("SparseMatrix::build_csr is not implemented yet"); + }; inline const int num_rows() const { return rows_; @@ -190,11 +196,28 @@ class EigenSparseMatrix : public SparseMatrix { EigenMatrix matrix_; }; + +class CuSparseMatrix : public SparseMatrix { +public: + explicit CuSparseMatrix(int rows, int cols, DataType dt) + : SparseMatrix(rows, cols, dt){ + } + + virtual ~CuSparseMatrix(); + void build_csr(void *csr_ptr, void* csr_indices_ptr, void* csr_values_ptr, int nnz) override; + + void spmv(Program *prog, const Ndarray &x, Ndarray &y); + +private: + cusparseSpMatDescr_t matrix_; +}; + std::unique_ptr make_sparse_matrix( int rows, int cols, DataType dt, const std::string &storage_format); +std::unique_ptr make_cu_sparse_matrix(int rows,int cols,DataType dt); void make_sparse_matrix_from_ndarray(Program *prog, SparseMatrix &sm, @@ -203,8 +226,6 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, const Ndarray &row_offsets, const Ndarray &col_indices, - const Ndarray &values, - const Ndarray &x, - Ndarray &y); + const Ndarray &values); } // namespace lang } // namespace taichi diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 3fffb1589d4c2..fb12306c7cbc0 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -379,17 +379,20 @@ void export_lang(py::module &m) { std::string storage_format) { // TI_ERROR_IF(!arch_is_cpu(program->config.arch), // "SparseMatrix only supports CPU for now."); - return make_sparse_matrix(n, m, dtype, storage_format); + if (arch_is_cpu(program->config.arch)) + return make_sparse_matrix(n, m, dtype, storage_format); + else + return make_cu_sparse_matrix(n, m, dtype); }) .def("make_sparse_matrix_from_ndarray", [](Program *program, SparseMatrix &sm, const Ndarray &ndarray) { - // TI_ERROR_IF(!arch_is_cpu(program->config.arch), + // TI_ERROR_IF(!arch_is_cpu(program->config.arch),m // "SparseMatrix only supports CPU for now."); return make_sparse_matrix_from_ndarray(program, sm, ndarray); }) .def("make_sparse_matrix_from_ndarray_cusparse", - [](Program *program, SparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr, const Ndarray &x, Ndarray &y) { - return make_sparse_matrix_from_ndarray_cusparse(program, sm, row_csr, col_csr, val_csr, x, y); + [](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr) { + return make_sparse_matrix_from_ndarray_cusparse(program, sm, row_csr, col_csr, val_csr); } ) .def( @@ -1072,6 +1075,10 @@ void export_lang(py::module &m) { MAKE_SPARSE_MATRIX(64, ColMajor, d); MAKE_SPARSE_MATRIX(64, RowMajor, d); + py::class_(m, "CuSparseMatrix") + .def("spmv",&CuSparseMatrix::spmv); + + py::class_(m, "SparseSolver") .def("compute", &SparseSolver::compute) .def("analyze_pattern", &SparseSolver::analyze_pattern) From 457a9f17bab91cd518747bcbe348b6ae6e3a561f Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 12:53:00 +0800 Subject: [PATCH 16/30] refactor --- misc/sm_gpu_ndarray.py | 37 ++++++++++++++++++--------- python/taichi/linalg/sparse_matrix.py | 30 ++++++++++++++++------ taichi/backends/arch.cpp | 4 +++ taichi/backends/arch.h | 2 ++ taichi/backends/cuda/cuda_context.cpp | 5 +--- taichi/backends/cuda/cuda_context.h | 6 ----- taichi/backends/cuda/cuda_driver.h | 6 ++--- taichi/program/sparse_matrix.cpp | 32 ----------------------- taichi/program/sparse_matrix.h | 2 +- taichi/python/export_lang.cpp | 12 ++++----- 10 files changed, 63 insertions(+), 73 deletions(-) diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py index ff20e9dfd1678..24726a283eba2 100644 --- a/misc/sm_gpu_ndarray.py +++ b/misc/sm_gpu_ndarray.py @@ -1,35 +1,48 @@ import taichi as ti import numpy as np -ti.init(arch=ti.cuda, gdb_trigger=False) +ti.init(arch=ti.cuda) +# Numpy arrays for taichi ndarrays +h_row_csr = np.asarray([ 0, 3, 4, 7, 9], dtype=np.int32) +h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3 ], dtype=np.int32) +h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0,6.0, 7.0, 8.0, 9.0], dtype=np.float32) +h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) +h_Y = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) + +# Data structure for building the CSR matrix A using Taichi Sparse Matrix idx_dt = ti.int32 val_dt = ti.f32 row_csr = ti.ndarray(shape=5, dtype=idx_dt) col_csr = ti.ndarray(shape=9, dtype=idx_dt) value_csr = ti.ndarray(shape=9, dtype=val_dt) +# Dense vector x X = ti.ndarray(shape=4, dtype=val_dt) +# Results for A @ x Y = ti.ndarray(shape=4, dtype=val_dt) -Y_result = ti.ndarray(shape=4, dtype=val_dt) - - -h_row_csr = np.asarray([ 0, 3, 4, 7, 9], dtype=np.int32) -h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3 ], dtype=np.int32) -h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0,6.0, 7.0, 8.0, 9.0], dtype=np.float32) -h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) -h_Y_result = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) - +# Initialize the CSR matrix and vectors with numpy array row_csr.from_numpy(h_row_csr) col_csr.from_numpy(h_col_csr) value_csr.from_numpy(h_value_csr) X.from_numpy(h_X) Y.fill(0.0) +# Define the CSR matrix A A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) -A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) +# Build the CSR matrix A with Taichi ndarray +A.build_csr_cusparse(row_csr, col_csr, value_csr) +# Compute Y = A @ X A.spmv(X, Y) +# Check if the results are correct +equal = True for i in range(4): - print(f"{Y[i]} == {h_Y_result[i]} : {Y[i] == h_Y_result[i]}") \ No newline at end of file + if Y[i] != h_Y[i]: + equal = False + break +if equal: + print("Spmv Results is correct!") +else: + print("Opps! Spmv Results is wrong.") \ No newline at end of file diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 21b6bf4d2a165..540d6dd3194b8 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -194,22 +194,36 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) - def build_from_ndarray_cusparse(self, row_csr, col_csr, value_csr): - if isinstance(row_csr, Ndarray) and isinstance(col_csr, Ndarray) and isinstance(value_csr, Ndarray): - print(type(self.matrix)) + def build_csr_cusparse(self, data, indices, indptr): + """Build a csr format sparse matrix using cuSparse where the column indices + for row i are stored in ``indices[indptr[i]:indptr[i+1]]`` + and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. + + Args: + data (ti.ndarray): CSR format data array of the matrix. + indices (ti.ndarray): CSR format index array of the matrix. + indptr (ti.ndarray): CSR format index pointer array of the matrix. + """ + if isinstance(data, Ndarray) and isinstance(indices, Ndarray) and isinstance(indptr, Ndarray): get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( - self.matrix, row_csr.arr, col_csr.arr, value_csr.arr) + self.matrix, indptr.arr, indices.arr, data.arr) else: raise TaichiRuntimeError( 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' ) def spmv(self, x, y): - """Sparse matrix-vector multiplication. + """Sparse matrix-vector multiplication using cuSparse. Args: - x (ti.Vector): the vector to be multiplied. - Returns: - The result of matrix-vector multiplication. + x (ti.ndarray): the vector to be multiplied. + y (ti.ndarray): the result of matrix-vector multiplication. + + Example:: + >>> x = ti.ndarray(shape=4, dtype=val_dt) + >>> y = ti.ndarray(shape=4, dtype=val_dt) + >>> A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) + >>> A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) + >>> A.spmv(x, y) """ if not isinstance(x, Ndarray) or not isinstance(y, Ndarray): raise TaichiRuntimeError( diff --git a/taichi/backends/arch.cpp b/taichi/backends/arch.cpp index 9de9c8e82f7e3..2fcfd915a8a2f 100644 --- a/taichi/backends/arch.cpp +++ b/taichi/backends/arch.cpp @@ -43,6 +43,10 @@ bool arch_is_cpu(Arch arch) { } } +bool arch_uses_cuda(Arch arch){ + return arch == Arch::cuda; +} + bool arch_uses_llvm(Arch arch) { return (arch == Arch::x64 || arch == Arch::arm64 || arch == Arch::cuda || arch == Arch::wasm); diff --git a/taichi/backends/arch.h b/taichi/backends/arch.h index 2d7cffde8950f..bfc1f2adc324f 100644 --- a/taichi/backends/arch.h +++ b/taichi/backends/arch.h @@ -18,6 +18,8 @@ Arch arch_from_name(const std::string &arch); bool arch_is_cpu(Arch arch); +bool arch_uses_cuda(Arch arch); + bool arch_uses_llvm(Arch arch); bool arch_is_gpu(Arch arch); diff --git a/taichi/backends/cuda/cuda_context.cpp b/taichi/backends/cuda/cuda_context.cpp index 1e204cff5f2be..a138c3d555395 100644 --- a/taichi/backends/cuda/cuda_context.cpp +++ b/taichi/backends/cuda/cuda_context.cpp @@ -13,9 +13,7 @@ TLANG_NAMESPACE_BEGIN CUDAContext::CUDAContext() - : profiler_(nullptr), - driver_(CUDADriver::get_instance_without_context()) - { + : profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()) { // CUDA initialization dev_count_ = 0; driver_.init(0); @@ -129,7 +127,6 @@ CUDAContext::~CUDAContext() { CUDADriver::get_instance().cuModuleUnload(cudaModule); CUDADriver::get_instance().cuCtxDestroy(context); */ -// cusparse_driver_.cpDestroy(cusparse_handle_); } CUDAContext &CUDAContext::get_instance() { diff --git a/taichi/backends/cuda/cuda_context.h b/taichi/backends/cuda/cuda_context.h index 5e3801f302d33..69a02adf6f082 100644 --- a/taichi/backends/cuda/cuda_context.h +++ b/taichi/backends/cuda/cuda_context.h @@ -70,10 +70,6 @@ class CUDAContext { return compute_capability_; } - // cusparseHandle_t& get_cusparse_handle(){ - // return cusparse_handle_; - // } - ~CUDAContext(); class ContextGuard { @@ -93,8 +89,6 @@ class CUDAContext { CUDADriver::get_instance().context_set_current(old_ctx_); } } - - }; ContextGuard get_guard() { diff --git a/taichi/backends/cuda/cuda_driver.h b/taichi/backends/cuda/cuda_driver.h index 08575d76fbca0..6bf1d90921a59 100644 --- a/taichi/backends/cuda/cuda_driver.h +++ b/taichi/backends/cuda/cuda_driver.h @@ -137,7 +137,6 @@ class CUDADriver : protected CUDADriverBase { class CUSPARSEDriver : protected CUDADriverBase { public: - // TODO: Add cusparse function APIs static CUSPARSEDriver &get_instance(); #define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ @@ -147,9 +146,8 @@ class CUSPARSEDriver : protected CUDADriverBase { bool load_cusparse(); - bool is_loaded() { - return cusparse_loaded_; - } + inline bool is_loaded() { return cusparse_loaded_;} + private: CUSPARSEDriver(); std::mutex lock_; diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 8947906268cbe..e3f1534dcaff2 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -1,5 +1,4 @@ #include "taichi/program/sparse_matrix.h" -#include "taichi/backends/cuda/cuda_driver.h" #include #include @@ -220,37 +219,6 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); int nnz = values.get_nelement(); sm.build_csr((void*)row_csr, (void*)col_csr, (void*)values_csr, nnz); - - // int A_num_rows = sm.num_rows(); - // int A_num_cols = sm.num_cols(); - // cusparseSpMatDescr_t matA; - // CUSPARSEDriver::get_instance().cpCreateCsr(&matA, A_num_rows, A_num_cols, nnz, - // (void*)row_csr, (void*)col_csr, (void*)values_csr, - // CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, - // CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); - // size_t dX = prog->get_ndarray_data_ptr_as_int(&x); - // size_t dY = prog->get_ndarray_data_ptr_as_int(&y); - // cusparseDnVecDescr_t vecX, vecY; - // CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, A_num_cols,(void*) dX, CUDA_R_32F); - // CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, A_num_cols,(void*) dY, CUDA_R_32F); - // cusparseHandle_t cusparse_handle; - // CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); - // float alpha = 1.0f, beta = 0.0f; - // size_t bufferSize = 0; - // CUSPARSEDriver::get_instance().cpSpMV_bufferSize(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - // &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, - // CUSPARSE_SPMV_CSR_ALG1, &bufferSize); - // void* dBuffer = NULL; - // // CUDADriver::get_instance().stream_synchronize(nullptr); - // CUDADriver::get_instance().malloc(&dBuffer, bufferSize); - // CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - // &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, - // CUSPARSE_SPMV_CSR_ALG1, dBuffer); - // CUSPARSEDriver::get_instance().cpDestroySpMat(matA); - // CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX); - // CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY); - // CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); - // CUDADriver::get_instance().mem_free(dBuffer); } void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y){ diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 5f9d84dfedf4e..81b23aa0e4937 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -1,11 +1,11 @@ #pragma once +#include "taichi/backends/cuda/cuda_driver.h" #include "taichi/common/core.h" #include "taichi/inc/constants.h" #include "taichi/ir/type_utils.h" #include "taichi/program/ndarray.h" #include "taichi/program/program.h" -#include "taichi/backends/cuda/cuda_context.h" #include "Eigen/Sparse" diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index fb12306c7cbc0..6749a28c8a36a 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -369,16 +369,16 @@ void export_lang(py::module &m) { .def("create_sparse_matrix_builder", [](Program *program, int n, int m, uint64 max_num_entries, DataType dtype, const std::string &storage_format) { - // TI_ERROR_IF(!arch_is_cpu(program->config.arch), - // "SparseMatrix only supports CPU for now."); + TI_ERROR_IF(!arch_is_cpu(program->config.arch), + "SparseMatrix Builder only supports CPU for now."); return SparseMatrixBuilder(n, m, max_num_entries, dtype, storage_format); }) .def("create_sparse_matrix", [](Program *program, int n, int m, DataType dtype, std::string storage_format) { - // TI_ERROR_IF(!arch_is_cpu(program->config.arch), - // "SparseMatrix only supports CPU for now."); + TI_ERROR_IF(!arch_is_cpu(program->config.arch) && !arch_uses_cuda(program->config.arch), + "SparseMatrix only supports CPU and CUDA for now."); if (arch_is_cpu(program->config.arch)) return make_sparse_matrix(n, m, dtype, storage_format); else @@ -386,8 +386,8 @@ void export_lang(py::module &m) { }) .def("make_sparse_matrix_from_ndarray", [](Program *program, SparseMatrix &sm, const Ndarray &ndarray) { - // TI_ERROR_IF(!arch_is_cpu(program->config.arch),m - // "SparseMatrix only supports CPU for now."); + TI_ERROR_IF(!arch_is_cpu(program->config.arch) && !arch_uses_cuda(program->config.arch), + "SparseMatrix only supports CPU and CUDA for now."); return make_sparse_matrix_from_ndarray(program, sm, ndarray); }) .def("make_sparse_matrix_from_ndarray_cusparse", From 9ae67223ac770e02a1c27c0dcb5d8a33161ad884 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 17:19:51 +0800 Subject: [PATCH 17/30] fix parameter bug --- misc/sm_gpu_ndarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py index 24726a283eba2..0adc7ebd141f3 100644 --- a/misc/sm_gpu_ndarray.py +++ b/misc/sm_gpu_ndarray.py @@ -31,7 +31,7 @@ A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) # Build the CSR matrix A with Taichi ndarray -A.build_csr_cusparse(row_csr, col_csr, value_csr) +A.build_csr_cusparse(value_csr,col_csr, row_csr) # Compute Y = A @ X A.spmv(X, Y) From 73ebb92158a50aea8989296d286e463ff59587dc Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 17:21:17 +0800 Subject: [PATCH 18/30] fix test bug --- taichi/backends/cuda/cuda_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index cd65dcdb7ff2f..deef46de021ff 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -20,7 +20,7 @@ bool CUDADriver::detected() { } CUDADriver::CUDADriver() { - load_lib("libcuda.so", "nvcuda.dll"); + if (!load_lib("libcuda.so", "nvcuda.dll")) return; loader_->load_function("cuGetErrorName", get_error_name); loader_->load_function("cuGetErrorString", get_error_string); From bc857d98239192be2c0ac914f7d10464ae03bc73 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 16 Jun 2022 17:38:53 +0800 Subject: [PATCH 19/30] fix --- taichi/backends/cuda/cuda_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/backends/cuda/cuda_driver.cpp b/taichi/backends/cuda/cuda_driver.cpp index deef46de021ff..24f529a2242a0 100644 --- a/taichi/backends/cuda/cuda_driver.cpp +++ b/taichi/backends/cuda/cuda_driver.cpp @@ -84,8 +84,8 @@ bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { return false; } else { TI_TRACE("{} loaded!", lib_name); + return true; } - return true; } CUSPARSEDriver::CUSPARSEDriver() { From c0b395b7ccbd868ade75800ee4ea9f86948031e6 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 28 Jul 2022 16:08:52 +0800 Subject: [PATCH 20/30] fix mac/windows failed tests --- taichi/program/sparse_matrix.cpp | 9 +++++++++ taichi/python/export_lang.cpp | 3 +++ 2 files changed, 12 insertions(+) diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index e138342d4f75f..dc1da64b748f6 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -202,19 +202,25 @@ void CuSparseMatrix::build_csr(void *csr_ptr, void *csr_indices_ptr, void *csr_values_ptr, int nnz) { +#if define(TI_WITH_CUDA) CUSPARSEDriver::get_instance().cpCreateCsr( &matrix_, rows_, cols_, nnz, csr_ptr, csr_indices_ptr, csr_values_ptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); +#endif } CuSparseMatrix::~CuSparseMatrix() { +#if define(TI_WITH_CUDA) CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_); +#endif } void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, const Ndarray &row_offsets, const Ndarray &col_indices, const Ndarray &values) { +#if define(TI_WITH_CUDA) + std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); if (!CUSPARSEDriver::get_instance().is_loaded()) { bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); if (!load_success) { @@ -226,9 +232,11 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); int nnz = values.get_nelement(); sm.build_csr((void *)row_csr, (void *)col_csr, (void *)values_csr, nnz); +#endif } void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { +#if define(TI_WITH_CUDA) size_t dX = prog->get_ndarray_data_ptr_as_int(&x); size_t dY = prog->get_ndarray_data_ptr_as_int(&y); @@ -256,6 +264,7 @@ void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY); CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); CUDADriver::get_instance().mem_free(dBuffer); +#endif } } // namespace lang diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 4984660aef017..a1482c0709cd7 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -399,6 +399,9 @@ void export_lang(py::module &m) { .def("make_sparse_matrix_from_ndarray_cusparse", [](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr) { + TI_ERROR_IF(!arch_is_cpu(program->config.arch) && + !arch_uses_cuda(program->config.arch), + "SparseMatrix only supports CPU and CUDA for now."); return make_sparse_matrix_from_ndarray_cusparse( program, sm, row_csr, col_csr, val_csr); }) From f60e675e8eebd6086abf3e50850f73f1f3a79560 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 28 Jul 2022 16:19:44 +0800 Subject: [PATCH 21/30] fix --- taichi/program/sparse_matrix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index dc1da64b748f6..a91bbf0831aef 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -202,7 +202,7 @@ void CuSparseMatrix::build_csr(void *csr_ptr, void *csr_indices_ptr, void *csr_values_ptr, int nnz) { -#if define(TI_WITH_CUDA) +#if defined(TI_WITH_CUDA) CUSPARSEDriver::get_instance().cpCreateCsr( &matrix_, rows_, cols_, nnz, csr_ptr, csr_indices_ptr, csr_values_ptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, @@ -210,7 +210,7 @@ void CuSparseMatrix::build_csr(void *csr_ptr, #endif } CuSparseMatrix::~CuSparseMatrix() { -#if define(TI_WITH_CUDA) +#if defined(TI_WITH_CUDA) CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_); #endif } @@ -219,7 +219,7 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, const Ndarray &row_offsets, const Ndarray &col_indices, const Ndarray &values) { -#if define(TI_WITH_CUDA) +#if defined(TI_WITH_CUDA) std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); if (!CUSPARSEDriver::get_instance().is_loaded()) { bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); @@ -236,7 +236,7 @@ void make_sparse_matrix_from_ndarray_cusparse(Program *prog, } void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { -#if define(TI_WITH_CUDA) +#if defined(TI_WITH_CUDA) size_t dX = prog->get_ndarray_data_ptr_as_int(&x); size_t dY = prog->get_ndarray_data_ptr_as_int(&y); From 2e1edda70073605013d11a0b54d2b9762fe21aa0 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Fri, 29 Jul 2022 11:21:07 +0800 Subject: [PATCH 22/30] add tests for gpu sparse matrix --- misc/sm_gpu_ndarray.py | 51 ------------------------------ taichi/python/export_lang.cpp | 6 ++-- tests/python/test_sparse_matrix.py | 40 +++++++++++++++++++++++ 3 files changed, 43 insertions(+), 54 deletions(-) delete mode 100644 misc/sm_gpu_ndarray.py diff --git a/misc/sm_gpu_ndarray.py b/misc/sm_gpu_ndarray.py deleted file mode 100644 index 256bcaab7146c..0000000000000 --- a/misc/sm_gpu_ndarray.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np - -import taichi as ti - -ti.init(arch=ti.cuda) - -# Numpy arrays for taichi ndarrays -h_row_csr = np.asarray([0, 3, 4, 7, 9], dtype=np.int32) -h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3], dtype=np.int32) -h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], - dtype=np.float32) -h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) -h_Y = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) - -# Data structure for building the CSR matrix A using Taichi Sparse Matrix -idx_dt = ti.int32 -val_dt = ti.f32 -row_csr = ti.ndarray(shape=5, dtype=idx_dt) -col_csr = ti.ndarray(shape=9, dtype=idx_dt) -value_csr = ti.ndarray(shape=9, dtype=val_dt) -# Dense vector x -X = ti.ndarray(shape=4, dtype=val_dt) -# Results for A @ x -Y = ti.ndarray(shape=4, dtype=val_dt) - -# Initialize the CSR matrix and vectors with numpy array -row_csr.from_numpy(h_row_csr) -col_csr.from_numpy(h_col_csr) -value_csr.from_numpy(h_value_csr) -X.from_numpy(h_X) -Y.fill(0.0) - -# Define the CSR matrix A -A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) - -# Build the CSR matrix A with Taichi ndarray -A.build_csr_cusparse(value_csr, col_csr, row_csr) - -# Compute Y = A @ X -A.spmv(X, Y) - -# Check if the results are correct -equal = True -for i in range(4): - if Y[i] != h_Y[i]: - equal = False - break -if equal: - print("Spmv Results is correct!") -else: - print("Opps! Spmv Results is wrong.") diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index a1482c0709cd7..7d0e6a1a5dd0a 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -399,9 +399,9 @@ void export_lang(py::module &m) { .def("make_sparse_matrix_from_ndarray_cusparse", [](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch) && - !arch_uses_cuda(program->config.arch), - "SparseMatrix only supports CPU and CUDA for now."); + TI_ERROR_IF( + !arch_uses_cuda(program->config.arch), + "SparseMatrix based on GPU only supports CUDA for now."); return make_sparse_matrix_from_ndarray_cusparse( program, sm, row_csr, col_csr, val_csr); }) diff --git a/tests/python/test_sparse_matrix.py b/tests/python/test_sparse_matrix.py index d4f6b320c8484..305e39a39625a 100644 --- a/tests/python/test_sparse_matrix.py +++ b/tests/python/test_sparse_matrix.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import taichi as ti @@ -374,3 +375,42 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), for i in range(n): for j in range(m): assert C[i, j] == GT[i][j] + + +@test_utils.test(arch=ti.cuda) +def test_sparse_matrix_nonsymmetric_multiplication(): + h_row_csr = np.asarray([0, 3, 4, 7, 9], dtype=np.int32) + h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3], dtype=np.int32) + h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + dtype=np.float32) + h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) + h_Y = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) + + # Data structure for building the CSR matrix A using Taichi Sparse Matrix + idx_dt = ti.int32 + val_dt = ti.f32 + row_csr = ti.ndarray(shape=5, dtype=idx_dt) + col_csr = ti.ndarray(shape=9, dtype=idx_dt) + value_csr = ti.ndarray(shape=9, dtype=val_dt) + # Dense vector x + X = ti.ndarray(shape=4, dtype=val_dt) + # Results for A @ x + Y = ti.ndarray(shape=4, dtype=val_dt) + + # Initialize the CSR matrix and vectors with numpy array + row_csr.from_numpy(h_row_csr) + col_csr.from_numpy(h_col_csr) + value_csr.from_numpy(h_value_csr) + X.from_numpy(h_X) + Y.fill(0.0) + + # Define the CSR matrix A + A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) + + # Build the CSR matrix A with Taichi ndarray + A.build_csr_cusparse(value_csr, col_csr, row_csr) + + # Compute Y = A @ X + A.spmv(X, Y) + for i in range(4): + assert Y[i] == h_Y[i] From e060c3b7fc609c96dfd228fb1946b63e37d353bc Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Fri, 29 Jul 2022 11:49:54 +0800 Subject: [PATCH 23/30] fix test --- tests/python/test_sparse_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_sparse_matrix.py b/tests/python/test_sparse_matrix.py index 305e39a39625a..69a0d66f5958d 100644 --- a/tests/python/test_sparse_matrix.py +++ b/tests/python/test_sparse_matrix.py @@ -378,7 +378,7 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), @test_utils.test(arch=ti.cuda) -def test_sparse_matrix_nonsymmetric_multiplication(): +def test_gpu_sparse_matrix(): h_row_csr = np.asarray([0, 3, 4, 7, 9], dtype=np.int32) h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3], dtype=np.int32) h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], From 8e9021caa4c3edd940abc82d8a67fa45a3884906 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Thu, 4 Aug 2022 20:29:01 +0800 Subject: [PATCH 24/30] fix --- taichi/program/sparse_matrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index a91bbf0831aef..a888a649a7ebc 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -255,7 +255,7 @@ void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize); void *dBuffer = NULL; - CUDADriver::get_instance().malloc(&dBuffer, bufferSize); + CUDADriver::get_instance().malloc(&dBuffer, 8); CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer); From b7d1a70f182174bf898f8f05465449b0f37e7016 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Mon, 15 Aug 2022 14:25:39 +0800 Subject: [PATCH 25/30] fix cuMemAlloc_v2 bug and windows dll name bug --- taichi/program/sparse_matrix.cpp | 3 ++- taichi/rhi/cuda/cuda_driver.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index a888a649a7ebc..bdf646ee7594b 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -255,7 +255,8 @@ void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize); void *dBuffer = NULL; - CUDADriver::get_instance().malloc(&dBuffer, 8); + if (bufferSize > 0) + CUDADriver::get_instance().malloc(&dBuffer, bufferSize); CUSPARSEDriver::get_instance().cpSpMV( cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer); diff --git a/taichi/rhi/cuda/cuda_driver.cpp b/taichi/rhi/cuda/cuda_driver.cpp index d9356e478fc0b..cf205fac9fb53 100644 --- a/taichi/rhi/cuda/cuda_driver.cpp +++ b/taichi/rhi/cuda/cuda_driver.cpp @@ -98,7 +98,7 @@ CUSPARSEDriver &CUSPARSEDriver::get_instance() { } bool CUSPARSEDriver::load_cusparse() { - cusparse_loaded_ = load_lib("libcusparse.so", "cusparse.dll"); + cusparse_loaded_ = load_lib("libcusparse.so", "cusparse64_11.dll"); if (!cusparse_loaded_) { return false; From 6fc1e75da242f3cbd247e5da80a37570f980d475 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Mon, 15 Aug 2022 15:01:23 +0800 Subject: [PATCH 26/30] fix ci --- taichi/rhi/cuda/cusparse_functions.inc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/taichi/rhi/cuda/cusparse_functions.inc.h b/taichi/rhi/cuda/cusparse_functions.inc.h index a5edb184cd121..37df588ebdb66 100644 --- a/taichi/rhi/cuda/cusparse_functions.inc.h +++ b/taichi/rhi/cuda/cusparse_functions.inc.h @@ -7,7 +7,6 @@ PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); // cusparse sparse matrix description PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); -PER_CUSPARSE_FUNCTION(cpSpMatGetSize, cusparseSpMatGetSize, cusparseSpMatDescr_t, int64_t*, int64_t*, int64_t*); PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); // cusparse dense vector description From eada0f59b6a80d08516f155a4a8820645f89fee4 Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Tue, 16 Aug 2022 10:01:27 +0800 Subject: [PATCH 27/30] csr datatype checking --- python/taichi/linalg/sparse_matrix.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index ac9da51e79911..021a70c2ea3e9 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -6,7 +6,7 @@ from taichi.lang.impl import get_runtime from taichi.lang.matrix import Ndarray from taichi.lang.util import warning -from taichi.types import annotations, f32 +from taichi.types import annotations, f32, i32 class SparseMatrix: @@ -208,14 +208,18 @@ def build_csr_cusparse(self, data, indices, indptr): indices (ti.ndarray): CSR format index array of the matrix. indptr (ti.ndarray): CSR format index pointer array of the matrix. """ - if isinstance(data, Ndarray) and isinstance( - indices, Ndarray) and isinstance(indptr, Ndarray): - get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( - self.matrix, indptr.arr, indices.arr, data.arr) - else: + if not isinstance(data, Ndarray) or not isinstance( + indices, Ndarray) or not isinstance(indptr, Ndarray): raise TaichiRuntimeError( - 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' + 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray].' ) + elif data.dtype != f32 or indices.dtype != i32 or indptr.dtype != i32: + raise TaichiRuntimeError( + 'Sparse matrix only supports building from float32 data, int32 indices and indptr.' + ) + else: + get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( + self.matrix, indptr.arr, indices.arr, data.arr) def spmv(self, x, y): """Sparse matrix-vector multiplication using cuSparse. From 8f5ef400930a9cf8ac2bf7eb86c5d405150c31a7 Mon Sep 17 00:00:00 2001 From: pengyu <6712304+FantasyVR@users.noreply.github.com> Date: Tue, 16 Aug 2022 18:21:58 +0800 Subject: [PATCH 28/30] Apply suggestions from code review Co-authored-by: Yi Xu --- python/taichi/linalg/sparse_matrix.py | 6 +++--- taichi/program/sparse_matrix.h | 2 +- taichi/rhi/arch.cpp | 2 +- taichi/rhi/cuda/cuda_driver.cpp | 1 - 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 021a70c2ea3e9..e729052b05f05 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -211,11 +211,11 @@ def build_csr_cusparse(self, data, indices, indptr): if not isinstance(data, Ndarray) or not isinstance( indices, Ndarray) or not isinstance(indptr, Ndarray): raise TaichiRuntimeError( - 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray].' + 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray].' ) elif data.dtype != f32 or indices.dtype != i32 or indptr.dtype != i32: raise TaichiRuntimeError( - 'Sparse matrix only supports building from float32 data, int32 indices and indptr.' + 'Sparse matrix only supports building from float32 data and int32 indices/indptr.' ) else: get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( @@ -237,7 +237,7 @@ def spmv(self, x, y): """ if not isinstance(x, Ndarray) or not isinstance(y, Ndarray): raise TaichiRuntimeError( - 'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]' + 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) if self.m != x.shape[0]: raise TaichiRuntimeError( diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 0b53b06c83154..0087f9cc462c1 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -60,7 +60,7 @@ class SparseMatrix { virtual ~SparseMatrix() = default; virtual void build_triplets(void *triplets_adr) { - TI_WARN("SparseMatrix::build_triplets is not implemented!"); + TI_NOT_IMPLEMENTED; }; virtual void build_csr(void *csr_ptr, diff --git a/taichi/rhi/arch.cpp b/taichi/rhi/arch.cpp index c0885fa9fb552..bca52aefe49bd 100644 --- a/taichi/rhi/arch.cpp +++ b/taichi/rhi/arch.cpp @@ -43,7 +43,7 @@ bool arch_is_cpu(Arch arch) { } } -bool arch_uses_cuda(Arch arch) { +bool arch_is_cuda(Arch arch) { return arch == Arch::cuda; } diff --git a/taichi/rhi/cuda/cuda_driver.cpp b/taichi/rhi/cuda/cuda_driver.cpp index cf205fac9fb53..000050ebd976d 100644 --- a/taichi/rhi/cuda/cuda_driver.cpp +++ b/taichi/rhi/cuda/cuda_driver.cpp @@ -66,7 +66,6 @@ CUDADriverBase::CUDADriverBase() { disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); - return; } } From 4c39e80a9817a5b3d48079d2a6cd46ba080c0e6f Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Tue, 16 Aug 2022 18:44:10 +0800 Subject: [PATCH 29/30] arch fix --- taichi/program/sparse_matrix.h | 2 +- taichi/python/export_lang.cpp | 6 +++--- taichi/rhi/arch.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 0087f9cc462c1..be776045bad7d 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -67,7 +67,7 @@ class SparseMatrix { void *csr_indices_ptr, void *csr_values_ptr, int nnz) { - TI_WARN("SparseMatrix::build_csr is not implemented yet"); + TI_NOT_IMPLEMENTED; }; inline const int num_rows() const { diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 11b35265b0190..95b6050df9127 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -382,7 +382,7 @@ void export_lang(py::module &m) { [](Program *program, int n, int m, DataType dtype, std::string storage_format) { TI_ERROR_IF(!arch_is_cpu(program->config.arch) && - !arch_uses_cuda(program->config.arch), + !arch_is_cuda(program->config.arch), "SparseMatrix only supports CPU and CUDA for now."); if (arch_is_cpu(program->config.arch)) return make_sparse_matrix(n, m, dtype, storage_format); @@ -392,7 +392,7 @@ void export_lang(py::module &m) { .def("make_sparse_matrix_from_ndarray", [](Program *program, SparseMatrix &sm, const Ndarray &ndarray) { TI_ERROR_IF(!arch_is_cpu(program->config.arch) && - !arch_uses_cuda(program->config.arch), + !arch_is_cuda(program->config.arch), "SparseMatrix only supports CPU and CUDA for now."); return make_sparse_matrix_from_ndarray(program, sm, ndarray); }) @@ -400,7 +400,7 @@ void export_lang(py::module &m) { [](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr, const Ndarray &col_csr, const Ndarray &val_csr) { TI_ERROR_IF( - !arch_uses_cuda(program->config.arch), + !arch_is_cuda(program->config.arch), "SparseMatrix based on GPU only supports CUDA for now."); return make_sparse_matrix_from_ndarray_cusparse( program, sm, row_csr, col_csr, val_csr); diff --git a/taichi/rhi/arch.h b/taichi/rhi/arch.h index bfc1f2adc324f..47e74ef3acbb0 100644 --- a/taichi/rhi/arch.h +++ b/taichi/rhi/arch.h @@ -18,7 +18,7 @@ Arch arch_from_name(const std::string &arch); bool arch_is_cpu(Arch arch); -bool arch_uses_cuda(Arch arch); +bool arch_is_cuda(Arch arch); bool arch_uses_llvm(Arch arch); From 2dc766a6a5c11baf24796b40537059f7091ddaac Mon Sep 17 00:00:00 2001 From: FantasyVR Date: Tue, 16 Aug 2022 21:27:37 +0800 Subject: [PATCH 30/30] format --- taichi/rhi/cuda/cuda_driver.cpp | 52 ++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/taichi/rhi/cuda/cuda_driver.cpp b/taichi/rhi/cuda/cuda_driver.cpp index 000050ebd976d..f882b75cd5a62 100644 --- a/taichi/rhi/cuda/cuda_driver.cpp +++ b/taichi/rhi/cuda/cuda_driver.cpp @@ -15,6 +15,32 @@ std::string get_cuda_error_message(uint32 err) { return fmt::format("CUDA Error {}: {}", err_name_ptr, err_string_ptr); } +CUDADriverBase::CUDADriverBase() { + disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); + if (disabled_by_env_) { + TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); + } +} + +bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { +#if defined(TI_PLATFORM_LINUX) + auto lib_name = lib_linux; +#elif defined(TI_PLATFORM_WINDOWS) + auto lib_name = lib_windows; +#else + static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); +#endif + + loader_ = std::make_unique(lib_name); + if (!loader_->loaded()) { + TI_WARN("{} lib not found.", lib_name); + return false; + } else { + TI_TRACE("{} loaded!", lib_name); + return true; + } +} + bool CUDADriver::detected() { return !disabled_by_env_ && cuda_version_valid_ && loader_->loaded(); } @@ -62,32 +88,6 @@ CUDADriver &CUDADriver::get_instance() { return get_instance_without_context(); } -CUDADriverBase::CUDADriverBase() { - disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); - if (disabled_by_env_) { - TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); - } -} - -bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { -#if defined(TI_PLATFORM_LINUX) - auto lib_name = lib_linux; -#elif defined(TI_PLATFORM_WINDOWS) - auto lib_name = lib_windows; -#else - static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); -#endif - - loader_ = std::make_unique(lib_name); - if (!loader_->loaded()) { - TI_WARN("{} lib not found.", lib_name); - return false; - } else { - TI_TRACE("{} loaded!", lib_name); - return true; - } -} - CUSPARSEDriver::CUSPARSEDriver() { }