From 9e1bda931599db4cc6ca0deb54a5a304ab96ed21 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 20 Mar 2024 19:39:15 +0100 Subject: [PATCH] k_cache: add newly added supported types to llama-bench and CUDA supports_op --- examples/llama-bench/llama-bench.cpp | 3 +++ ggml-cuda.cu | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 4cb230804..82413b79d 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -249,6 +249,9 @@ static ggml_type ggml_type_from_name(const std::string & s) { if (s == "q5_1") { return GGML_TYPE_Q5_1; } + if (s == "iq4_nl") { + return GGML_TYPE_IQ4_NL; + } return GGML_TYPE_COUNT; } diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 14d021ac8..a2d4fae91 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -11465,6 +11465,15 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_1) { return true; } + if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q5_0) { + return true; + } + if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q5_1) { + return true; + } + if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) { + return true; + } if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) { return true; }