Allow quantize to only copy tensors, some other improvements (#2931)
* Allow quantize tool to only copy tensors to allow repackaging models. * Slightly better logic when requantizing. * Change help message to go to `stdout`.
This commit is contained in:
parent
0d58936686
commit
5d6f19f16b
3 changed files with 37 additions and 13 deletions
1
llama.h
1
llama.h
|
@ -164,6 +164,7 @@ extern "C" {
|
|||
enum llama_ftype ftype; // quantize to this llama_ftype
|
||||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
} llama_model_quantize_params;
|
||||
|
||||
// grammar types
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue