From d41cef9326cddb895b3b44edd0287602b0de774a Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 8 Jan 2024 13:42:20 +0100 Subject: [PATCH] minor --- common/common.cpp | 30 +++++++++++------------------- llama.h | 4 ++-- tests/test-backend-ops.cpp | 14 +++++++------- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 47f9381cf..58155f7f5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -529,9 +529,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD params.n_gpu_layers = std::stoi(argv[i]); -#else +#ifndef LLAMA_SUPPORTS_GPU_OFFLOAD fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); #endif @@ -540,9 +539,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD params.n_gpu_layers_draft = std::stoi(argv[i]); -#else +#ifndef LLAMA_SUPPORTS_GPU_OFFLOAD fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n"); fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); #endif @@ -551,11 +549,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } -#ifdef GGML_USE_CUBLAS params.main_gpu = std::stoi(argv[i]); -#else - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n"); -#endif +#ifndef GGML_USE_CUBLAS + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the main GPU has no effect.\n"); +#endif // GGML_USE_CUBLAS } else if (arg == "--split-mode" || arg == "-sm") { if (++i >= argc) { invalid_param = true; @@ -580,15 +577,16 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } -#ifdef GGML_USE_CUBLAS std::string arg_next = argv[i]; // split string by , and / const std::regex regex{R"([,/]+)"}; std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; std::vector split_arg{it, {}}; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); - + if (split_arg.size() >= LLAMA_MAX_DEVICES) { + invalid_param = true; + break; + } for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { if (i < split_arg.size()) { params.tensor_split[i] = std::stof(split_arg[i]); @@ -596,14 +594,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.tensor_split[i] = 0.0f; } } -#else - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n"); -#endif // GGML_USE_CUBLAS - } else if (arg == "--no-mul-mat-q" || arg == "-nommq") { -#ifdef GGML_USE_CUBLAS - params.mul_mat_q = false; -#else - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Disabling mul_mat_q kernels has no effect.\n"); +#ifndef GGML_USE_CUBLAS + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting a tensor split has no effect.\n"); #endif // GGML_USE_CUBLAS } else if (arg == "--no-mmap") { params.use_mmap = false; diff --git a/llama.h b/llama.h index 662c2c3be..605783b42 100644 --- a/llama.h +++ b/llama.h @@ -117,7 +117,7 @@ extern "C" { enum llama_split_mode { LLAMA_SPLIT_NONE = 0, // single GPU - LLAMA_SPLIT_LAYER = 1, // split layers and KV to different GPUs + LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs LLAMA_SPLIT_ROW = 2, // split rows across GPUs }; @@ -188,7 +188,7 @@ extern "C" { // for small tensors and intermediate results (LLAMA_SPLIT_ROW) // ignored for LLAMA_SPLIT_LAYER int32_t main_gpu; - // fraction of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES + // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES const float * tensor_split; // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 1f95258a8..18eb4a881 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -474,17 +474,17 @@ struct test_case { printf("compare failed "); } - if (ud.ok && cmp_ok) { - printf("\033[1;32mOK\033[0m\n"); - } else { - printf("\033[1;31mFAIL\033[0m\n"); - } - ggml_backend_buffer_free(buf); ggml_free(ctx); - return ud.ok; + if (ud.ok && cmp_ok) { + printf("\033[1;32mOK\033[0m\n"); + return true; + } + + printf("\033[1;31mFAIL\033[0m\n"); + return false; } bool eval_perf(ggml_backend_t backend, const char * op_name) {