quantize : --pure option for disabling k-quant mixtures

2023-10-28 16:32:49 -04:00 · 2023-10-28 16:32:49 -04:00 · 8a86b95e87
commit 8a86b95e87
parent ee37e35dc5
3 changed files with 10 additions and 3 deletions
--- a/llama.h
+++ b/llama.h
@ -191,6 +191,7 @@ extern "C" {
        bool allow_requantize;       // allow quantizing non-f32/f16 tensors
        bool quantize_output_tensor; // quantize output.weight
        bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
+        bool pure;                   // disable k-quant mixtures and quantize all tensors to the same type
    } llama_model_quantize_params;

    // grammar types