Revert "Support ROCm at compile-time (#122)"

This reverts commit 8bea4e1 which broke automatic compilation on Windows.
Mozilla-Ocho · Dec 28, 2023 · 18c2821 · 18c2821
1 parent 9e972b2
commit 18c2821
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 57 deletions.
diff --git a/llamafile/compcap.cu b/llamafile/compcap.cu
@@ -1,20 +1,7 @@
 // https://stackoverflow.com/a/40695640/1653720
 #include <cstdio>
 #include <cstdlib>
-#if !defined(USE_HIP)
 #include <cuda_runtime_api.h>
-#else
-#include <hip/hip_runtime_api.h>
-#endif
-
-#if defined(USE_HIP)
-#define cudaGetDeviceProperties hipGetDeviceProperties
-#define cudaGetDeviceCount hipGetDeviceCount
-#define cudaError_t hipError_t
-#define cudaDeviceProp hipDeviceProp_t
-#define cudaSuccess hipSuccess
-#define cudaGetErrorString hipGetErrorString
-#endif
 
 int main(int argc, char *argv[]) {
   cudaDeviceProp prop;
@@ -38,11 +25,6 @@ int main(int argc, char *argv[]) {
     fprintf(stderr, "cudaGetDeviceProperties() for device device_index failed: %s\n", cudaGetErrorString(status));
     return -1;
   }
-
-#if !defined(USE_HIP)
   int v = prop.major * 10 + prop.minor;
   printf("%d", v);
-#else
-  printf("%s", prop.gcnArchName);
-#endif
 }
diff --git a/llamafile/cuda.c b/llamafile/cuda.c
@@ -43,15 +43,8 @@ __static_yoink("llama.cpp/ggml-cuda.cu");
 __static_yoink("llama.cpp/ggml-backend.h");
 __static_yoink("llama.cpp/ggml-backend-impl.h");
 
-#define USE_HIP 1
-
-#if !USE_HIP
 #define NVCC_LIBS "-lcublas"
-#else
-#define NVCC_LIBS "-lhipblas", "-lrocblas"
-#endif
 
-#if !USE_HIP
 #define NVCC_FLAGS "--shared",                                          \
         "--forward-unknown-to-host-compiler",                           \
         "-use_fast_math",                                               \
@@ -65,21 +58,6 @@ __static_yoink("llama.cpp/ggml-backend-impl.h");
         "-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128",                          \
         "-DGGML_USE_CUBLAS"
 
-
-#else
-#define NVCC_FLAGS "-shared",                                          \
-        "-use_fast_math",                                               \
-        IsWindows() ? "" : "-fPIC", "-O3", "-march=native", "-mtune=native",               \
-        "-DNDEBUG",                                                     \
-        "-DGGML_BUILD=1",                                               \
-        "-DGGML_SHARED=1",                                              \
-        "-DGGML_CUDA_DMMV_X=32",                                        \
-        "-DGGML_CUDA_MMV_Y=1",                                          \
-        "-DK_QUANTS_PER_ITERATION=2",                                   \
-        "-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128",                          \
-        "-DGGML_USE_HIPBLAS"
-#endif
-
 static const struct Source {
     const char *zip;
     const char *name;
@@ -228,11 +206,7 @@ static bool Compile(const char *src,
 // set $CUDA_PATH to empty string to disable cuda
 static bool GetNvccPath(char path[static PATH_MAX]) {
     const char *cuda_path;
-#if !USE_HIP
     if (commandv(IsWindows() ? "nvcc.exe" : "nvcc", path, PATH_MAX)) {
-#else
-    if (commandv(IsWindows() ? "hipcc.bin.exe" : "hipcc", path, PATH_MAX)) {
-#endif
         return true;
     } else if ((cuda_path = getenv("CUDA_PATH"))) {
         if (!*cuda_path) return false;
@@ -243,12 +217,10 @@ static bool GetNvccPath(char path[static PATH_MAX]) {
     } else {
         strlcpy(path, "/usr/local/cuda/bin/", PATH_MAX);
     }
-    strlcat(path, USE_HIP ? "nvcc" : "hipcc", PATH_MAX);
-#if !USE_HIP
+    strlcat(path, "nvcc", PATH_MAX);
     if (IsWindows()) {
         strlcat(path, ".exe", PATH_MAX);
     }
-#endif
     return IsExecutable(path);
 }
 
@@ -282,7 +254,7 @@ static dontinline bool GetNvccArchFlag(char *nvcc, char flag[static 32]) {
     // run than compiling / running this script and (2) the nvidia-smi
     // command isn't available on Jetson devices.
     tinyprint(2, "building nvidia compute capability detector...\n", NULL);
-    if (!Compile(src, tmp, exe, (char *[]){nvcc, "-o", tmp, src, USE_HIP ? "-DUSE_HIP=1" : 0, 0})) {
+    if (!Compile(src, tmp, exe, (char *[]){nvcc, "-o", tmp, src, 0})) {
         return false;
     }
 
@@ -323,7 +295,6 @@ static dontinline bool GetNvccArchFlag(char *nvcc, char flag[static 32]) {
         return false;
     }
 
-#if !USE_HIP
     // parse output of detector
     char *endptr;
     if (!*ibuf || !strtol(ibuf, &endptr, 10) || *endptr) {
@@ -333,13 +304,6 @@ static dontinline bool GetNvccArchFlag(char *nvcc, char flag[static 32]) {
 
     // return resulting flag
     stpcpy(stpcpy(flag, "-arch=compute_"), ibuf);
-#else
-    if (!*ibuf) {
-        tinyprint(2, "error: bad compute capability detector output\n", NULL);
-        return false;
-    }
-    stpcpy(stpcpy(flag, "--offload-arch="), ibuf);
-#endif
     return true;
 }
 
@@ -405,7 +369,7 @@ static bool CompileNativeCuda(char dso[static PATH_MAX]) {
     // try building dso with host nvidia microarchitecture
     tinyprint(2, "building ggml-cuda with nvcc -arch=native...\n", NULL);
     if (Compile(src, tmpdso, dso, (char *[]){
-                nvcc, USE_HIP ? "-march=native" : "-arch=native", NVCC_FLAGS, "-o", tmpdso,
+                nvcc, "-arch=native", NVCC_FLAGS, "-o", tmpdso,
                 src, NVCC_LIBS, NULL})) {
         return true;
     }