ggml : quantization refactoring (#3833)
* ggml : factor all quantization code in ggml-quants ggml-ci * ggml-quants : fix Zig and Swift builds + quantize tool ggml-ci * quantize : --pure option for disabling k-quant mixtures --------- Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
This commit is contained in:
parent
ff3bad83e2
commit
d69d777c02
11 changed files with 2372 additions and 2385 deletions
1
llama.h
1
llama.h
|
@ -191,6 +191,7 @@ extern "C" {
|
|||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
|
||||
} llama_model_quantize_params;
|
||||
|
||||
// grammar types
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue