use "ROCm" instead of "CUDA"

2023-08-22 19:24:35 +03:00 · 2023-08-22 19:24:35 +03:00 · 5d3e7b25e0
commit 5d3e7b25e0
parent 391dd9a0e2
5 changed files with 13 additions and 3 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -601,7 +601,11 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    fprintf(stdout, "                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
    fprintf(stdout, "  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n" );
    fprintf(stdout, "  -lv, --low-vram       don't allocate VRAM scratch buffer\n" );
 #if defined(GGML_USE_HIPBLAS)
    fprintf(stdout, "  -mmq, --mul-mat-q     use experimental mul_mat_q HIP kernels instead of hipBLAS. TEMP!!!\n" );
 #else
    fprintf(stdout, "  -mmq, --mul-mat-q     use experimental mul_mat_q CUDA kernels instead of cuBLAS. TEMP!!!\n" );
 #endif
    fprintf(stdout, "                        Reduces VRAM usage by 700/970/1430 MiB for 7b/13b/33b but prompt processing speed\n" );
    fprintf(stdout, "                        is still suboptimal, especially q2_K, q3_K, q5_K, and q6_K.\n" );
 #endif
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@ -504,7 +504,7 @@ struct test {
    static std::string get_backend() {
        if (cuda) {
-            return "CUDA";
+            return GGML_CUDA_NAME;
        }
        if (opencl) {
            return "OpenCL";
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -5025,7 +5025,7 @@ void ggml_init_cublas() {
        CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
        GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
        int64_t total_vram = 0;
-        fprintf(stderr, "%s: found %d CUDA devices:\n", __func__, g_device_count);
+        fprintf(stderr, "%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, g_device_count);
        for (int id = 0; id < g_device_count; ++id) {
            cudaDeviceProp prop;
            CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@ -2,6 +2,12 @@
 #include "ggml.h"
 #ifdef GGML_USE_HIPBLAS
 #define GGML_CUDA_NAME "ROCm"
 #else
 #define GGML_CUDA_NAME "CUDA"
 #endif
 #ifdef  __cplusplus
 extern "C" {
 #endif
--- a/llama.cpp
+++ b/llama.cpp
@ -1478,7 +1478,7 @@ static void llama_model_load_internal(
    (void) main_gpu;
    (void) mul_mat_q;
 #if defined(GGML_USE_CUBLAS)
-    LLAMA_LOG_INFO("%s: using CUDA for GPU acceleration\n", __func__);
+    LLAMA_LOG_INFO("%s: using " GGML_CUDA_NAME " for GPU acceleration\n", __func__);
    ggml_cuda_set_main_device(main_gpu);
    ggml_cuda_set_mul_mat_q(mul_mat_q);
 #define LLAMA_BACKEND_OFFLOAD       GGML_BACKEND_GPU