llama : fix F16/F32 downcast + improve names (#5980)

2024-03-11 09:56:47 +02:00 · 2024-03-11 09:56:47 +02:00 · ee35600b90
commit ee35600b90
parent be858f6205
2 changed files with 36 additions and 33 deletions
--- a/llama.h
+++ b/llama.h
@ -278,7 +278,7 @@ extern "C" {
        bool allow_requantize;       // allow quantizing non-f32/f16 tensors
        bool quantize_output_tensor; // quantize output.weight
        bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
-        bool pure;                   // disable k-quant mixtures and quantize all tensors to the same type
+        bool pure;                   // quantize all tensors to the default type
        void * imatrix;              // pointer to importance matrix data
    } llama_model_quantize_params;