Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

supports running on CPU for GGML_USE_CUBLAS=ON build #3946

Merged
merged 3 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5790,6 +5790,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
CUDA_CHECK(cudaFree(ptr));
}

static bool g_cublas_loaded = false;

bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
}

void ggml_init_cublas() {
static bool initialized = false;
Expand All @@ -5803,7 +5808,12 @@ void ggml_init_cublas() {
CUDA_CHECK(cudaDeviceSynchronize());
#endif

CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
initialized = true;
g_cublas_loaded = false;
return;
}

GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
int64_t total_vram = 0;
#if defined(GGML_CUDA_FORCE_MMQ)
Expand Down Expand Up @@ -5851,6 +5861,7 @@ void ggml_init_cublas() {
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));

initialized = true;
g_cublas_loaded = true;
}
}

Expand Down Expand Up @@ -7158,6 +7169,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
}

bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
if (!g_cublas_loaded) return false;

const int64_t ne10 = src1->ne[0];

const int64_t ne0 = dst->ne[0];
Expand Down Expand Up @@ -7843,6 +7856,8 @@ void ggml_cuda_free_scratch() {
}

bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
if (!g_cublas_loaded) return false;

ggml_cuda_func_t func;
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
Expand Down
2 changes: 2 additions & 0 deletions ggml-cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ extern "C" {
#define GGML_CUDA_MAX_DEVICES 16

GGML_API void ggml_init_cublas(void);
GGML_API bool ggml_cublas_loaded(void);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment that we differentiate between "initialized" and "loaded" state. The former means we have called ggml_init_cublas() but it's not guaranteed that there has been a CUDA device available, in which case ggml_cublas_loaded() is false.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


GGML_API void * ggml_cuda_host_malloc(size_t size);
GGML_API void ggml_cuda_host_free(void * ptr);

Expand Down
Loading