k_cache: add newly added supported types to llama-bench and CUDA supports_op

2024-03-20 19:39:15 +01:00 · 2024-03-20 19:39:15 +01:00 · 9e1bda9315
commit 9e1bda9315
parent d8a498dcbe
2 changed files with 12 additions and 0 deletions
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@ -249,6 +249,9 @@ static ggml_type ggml_type_from_name(const std::string & s) {
    if (s == "q5_1") {
        return GGML_TYPE_Q5_1;
    }
+    if (s == "iq4_nl") {
+        return GGML_TYPE_IQ4_NL;
+    }

    return GGML_TYPE_COUNT;
 }
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -11465,6 +11465,15 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
                if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q4_1) {
                    return true;
                }
+                if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q5_0) {
+                    return true;
+                }
+                if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_Q5_1) {
+                    return true;
+                }
+                if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) {
+                    return true;
+                }
                if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) {
                    return true;
                }