quantize : --pure option for disabling k-quant mixtures
This commit is contained in:
parent
ee37e35dc5
commit
8a86b95e87
3 changed files with 10 additions and 3 deletions
1
llama.h
1
llama.h
|
@ -191,6 +191,7 @@ extern "C" {
|
|||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
|
||||
} llama_model_quantize_params;
|
||||
|
||||
// grammar types
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue