ggml-quants : 1.625 bpw ternary packing for BitNet 1.58b

2024-06-19 12:21:08 -04:00 · 2024-06-19 12:21:08 -04:00 · bd807499f7
commit bd807499f7
parent ac146628e4
11 changed files with 594 additions and 4 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -158,6 +158,8 @@ extern "C" {
        LLAMA_FTYPE_MOSTLY_IQ4_XS        = 30, // except 1d tensors
        LLAMA_FTYPE_MOSTLY_IQ1_M         = 31, // except 1d tensors
        LLAMA_FTYPE_MOSTLY_BF16          = 32, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q2_2          = 33, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q1_3          = 34, // except 1d tensors

        LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
    };