Update llama_model_quantize_params

2024-04-22 23:38:09 +08:00 · 2024-04-22 23:38:09 +08:00 · 141eb5107f
commit 141eb5107f
parent d6e453eb6c
2 changed files with 2 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -14184,6 +14184,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
        /*.quantize_output_tensor      =*/ true,
        /*.only_copy                   =*/ false,
        /*.pure                        =*/ false,
+        /*.keep_split                  =*/ false,
        /*.imatrix                     =*/ nullptr,
        /*.kv_overrides                =*/ nullptr,
    };
--- a/llama.h
+++ b/llama.h
@ -288,9 +288,9 @@ extern "C" {
        bool quantize_output_tensor;         // quantize output.weight
        bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        bool pure;                           // quantize all tensors to the default type
+        bool keep_split;                     // quantize to the same number of shards
        void * imatrix;                      // pointer to importance matrix data
        void * kv_overrides;                 // pointer to vector containing overrides
-        bool keep_split;                     // quantize to the same number of shards
    } llama_model_quantize_params;

    // grammar types