From 4b7eccc7da88e2e6bb92308ca9d1663947658c2e Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Wed, 1 Nov 2023 22:22:57 +0100 Subject: [PATCH] Add Vulkan to llama-bench --- examples/llama-bench/llama-bench.cpp | 11 ++++++++--- ggml.c | 10 +++++++++- ggml.h | 1 + llama.cpp | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 20767d555..1c1ea0d17 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -479,6 +479,7 @@ struct test { static const int build_number; static const bool cuda; static const bool opencl; + static const bool vulkan; static const bool metal; static const bool gpu_blas; static const bool blas; @@ -554,6 +555,9 @@ struct test { if (opencl) { return "OpenCL"; } + if (vulkan) { + return "Vulkan"; + } if (metal) { return "Metal"; } @@ -569,7 +573,7 @@ struct test { static const std::vector & get_fields() { static const std::vector fields = { "build_commit", "build_number", - "cuda", "opencl", "metal", "gpu_blas", "blas", + "cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas", "cpu_info", "gpu_info", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads", "f16_kv", @@ -591,7 +595,7 @@ struct test { field == "avg_ns" || field == "stddev_ns") { return INT; } - if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" || + if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "metal" || field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "mul_mat_q") { return BOOL; } @@ -619,7 +623,7 @@ struct test { } std::vector values = { build_commit, std::to_string(build_number), - std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas), + std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas), cpu_info, gpu_info, model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params), std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv), @@ -645,6 +649,7 @@ const std::string test::build_commit = BUILD_COMMIT; const int test::build_number = BUILD_NUMBER; const bool test::cuda = !!ggml_cpu_has_cublas(); const bool test::opencl = !!ggml_cpu_has_clblast(); +const bool test::vulkan = !!ggml_cpu_has_vulkan(); const bool test::metal = !!ggml_cpu_has_metal(); const bool test::gpu_blas = !!ggml_cpu_has_gpublas(); const bool test::blas = !!ggml_cpu_has_blas(); diff --git a/ggml.c b/ggml.c index d7d972ccb..fd5acbe38 100644 --- a/ggml.c +++ b/ggml.c @@ -22308,8 +22308,16 @@ int ggml_cpu_has_clblast(void) { #endif } +int ggml_cpu_has_vulkan(void) { +#if defined(GGML_USE_VULKAN) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_gpublas(void) { - return ggml_cpu_has_cublas() || ggml_cpu_has_clblast(); + return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan(); } int ggml_cpu_has_sse3(void) { diff --git a/ggml.h b/ggml.h index 08bff5511..b75727607 100644 --- a/ggml.h +++ b/ggml.h @@ -2078,6 +2078,7 @@ extern "C" { GGML_API int ggml_cpu_has_blas (void); GGML_API int ggml_cpu_has_cublas (void); GGML_API int ggml_cpu_has_clblast (void); + GGML_API int ggml_cpu_has_vulkan (void); GGML_API int ggml_cpu_has_gpublas (void); GGML_API int ggml_cpu_has_sse3 (void); GGML_API int ggml_cpu_has_ssse3 (void); diff --git a/llama.cpp b/llama.cpp index 01cc1fb8f..b3fab8a67 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2495,7 +2495,7 @@ static void llm_load_tensors( #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU #define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU #elif defined(GGML_USE_VULKAN) - fprintf(stderr, "%s: using Vulkan for GPU acceleration\n", __func__); + LLAMA_LOG_INFO("%s: using Vulkan for GPU acceleration\n", __func__); #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU #define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU #else