ggml : add TQ1_0 and TQ2_0 ternary quantization types

2024-07-30 17:55:54 -04:00 · 2024-07-30 17:55:54 -04:00 · 77b8f84ae7
commit 77b8f84ae7
parent 79a278e922
10 changed files with 563 additions and 16 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -166,8 +166,10 @@ extern "C" {
        LLAMA_FTYPE_MOSTLY_Q4_0_4_4      = 33, // except 1d tensors
        LLAMA_FTYPE_MOSTLY_Q4_0_4_8      = 34, // except 1d tensors
        LLAMA_FTYPE_MOSTLY_Q4_0_8_8      = 35, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q2_2          = 36, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q1_3          = 37, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_TQ1_0         = 36, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_TQ2_0         = 37, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q1_3          = 38, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q2_2          = 39, // except 1d tensors

        LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
    };