quantize : --pure option for disabling k-quant mixtures

This commit is contained in:
cebtenzzre 2023-10-28 16:32:49 -04:00
parent ee37e35dc5
commit 8a86b95e87
3 changed files with 10 additions and 3 deletions

View file

@ -191,6 +191,7 @@ extern "C" {
bool allow_requantize; // allow quantizing non-f32/f16 tensors
bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
} llama_model_quantize_params;
// grammar types