Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lang] Load cuSparse and cuSolver library for preparing sparse matrix on GPU #4904

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion taichi/backends/cuda/cuda_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
TLANG_NAMESPACE_BEGIN

CUDAContext::CUDAContext()
: profiler_(nullptr), driver_(CUDADriver::get_instance_without_context()) {
: profiler_(nullptr),
driver_(CUDADriver::get_instance_without_context()),
cusparse_driver_(CUSPARSEDriver::get_instance()),
cusolver_driver_(CUSOLVERDriver::get_instance()) {
// CUDA initialization
dev_count_ = 0;
driver_.init(0);
Expand Down
4 changes: 4 additions & 0 deletions taichi/backends/cuda/cuda_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ TLANG_NAMESPACE_BEGIN
// cases such as unit testing where many Taichi programs are created/destroyed.

class CUDADriver;
class CUSPARSEDriver;
class CUSOLVERDriver;

class CUDAContext {
private:
Expand All @@ -26,6 +28,8 @@ class CUDAContext {
std::mutex lock_;
KernelProfilerBase *profiler_;
CUDADriver &driver_;
CUSPARSEDriver &cusparse_driver_;
CUSOLVERDriver &cusolver_driver_;
bool debug_;

public:
Expand Down
63 changes: 45 additions & 18 deletions taichi/backends/cuda/cuda_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,7 @@ bool CUDADriver::detected() {
}

CUDADriver::CUDADriver() {
disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0);
if (disabled_by_env_) {
TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\".");
return;
}

#if defined(TI_PLATFORM_LINUX)
loader_ = std::make_unique<DynamicLoader>("libcuda.so");
#elif defined(TI_PLATFORM_WINDOWS)
loader_ = std::make_unique<DynamicLoader>("nvcuda.dll");
#else
static_assert(false, "Taichi CUDA driver supports only Windows and Linux.");
#endif

if (!loader_->loaded()) {
TI_WARN("CUDA driver not found.");
return;
}
load_lib("libcuda.so", "nvcuda.dll");

loader_->load_function("cuGetErrorName", get_error_name);
loader_->load_function("cuGetErrorString", get_error_string);
Expand Down Expand Up @@ -78,4 +61,48 @@ CUDADriver &CUDADriver::get_instance() {
return get_instance_without_context();
}

CUDADriverBase::CUDADriverBase() {
disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0);
if (disabled_by_env_) {
TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\".");
return;
}
}

void CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) {
#if defined(TI_PLATFORM_LINUX)
auto lib_name = lib_linux;
#elif defined(TI_PLATFORM_WINDOWS)
auto lib_name = lib_windows;
#else
static_assert(false, "Taichi CUDA driver supports only Windows and Linux.");
#endif

loader_ = std::make_unique<DynamicLoader>(lib_name);
if (!loader_->loaded()) {
TI_WARN("{} lib not found.", lib_name);
return;
} else {
TI_TRACE("{} loaded!", lib_name);
}
}

CUSPARSEDriver::CUSPARSEDriver() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd suggest not to do this in the constructor, becuase if the lib fails to load, we get a broken/zombie object. A safer approach could be to load DLL in a function. If that fails, returns the exception/error to Python. Otherwise move that loaded lib into CuSolverDriver.

Also, we need to make sure not accidentally invoke CUSPARSEDRiver::get_instance() when we don't need the sparse matrix, otherwise Taichi could be broken if cuSparse is not installed.

I know this is not a newly introduced problem, so this is just something to think about :-)

load_lib("libcusparse.so", "cusparse.dll");
}

CUSPARSEDriver &CUSPARSEDriver::get_instance() {
static CUSPARSEDriver *instance = new CUSPARSEDriver();
return *instance;
}

CUSOLVERDriver::CUSOLVERDriver() {
load_lib("libcusolver.so", "cusolver.dll");
}

CUSOLVERDriver &CUSOLVERDriver::get_instance() {
static CUSOLVERDriver *instance = new CUSOLVERDriver();
return *instance;
}

TLANG_NAMESPACE_END
38 changes: 32 additions & 6 deletions taichi/backends/cuda/cuda_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,20 @@ class CUDADriverFunction {
std::mutex *driver_lock_{nullptr};
};

class CUDADriver {
class CUDADriverBase {
public:
~CUDADriverBase() = default;

protected:
std::unique_ptr<DynamicLoader> loader_;
CUDADriverBase();

void load_lib(std::string lib_linux, std::string lib_windows);

bool disabled_by_env_{false};
};

class CUDADriver : protected CUDADriverBase {
public:
#define PER_CUDA_FUNCTION(name, symbol_name, ...) \
CUDADriverFunction<__VA_ARGS__> name;
Expand All @@ -110,21 +123,34 @@ class CUDADriver {

bool detected();

~CUDADriver() = default;

static CUDADriver &get_instance();

static CUDADriver &get_instance_without_context();

private:
CUDADriver();

std::unique_ptr<DynamicLoader> loader_;

std::mutex lock_;

bool disabled_by_env_{false};
bool cuda_version_valid_{false};
};

class CUSPARSEDriver : protected CUDADriverBase {
public:
// TODO: Add cusparse function APIs
static CUSPARSEDriver &get_instance();

private:
CUSPARSEDriver();
};

class CUSOLVERDriver : protected CUDADriverBase {
public:
// TODO: Add cusolver function APIs
static CUSOLVERDriver &get_instance();

private:
CUSOLVERDriver();
};

TLANG_NAMESPACE_END