quantize: be able to specify the output tensor type
This commit is contained in:
parent
b2075fd6a5
commit
7883796f71
3 changed files with 34 additions and 10 deletions
1
llama.h
1
llama.h
|
@ -277,6 +277,7 @@ extern "C" {
|
|||
typedef struct llama_model_quantize_params {
|
||||
int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
|
||||
enum llama_ftype ftype; // quantize to this llama_ftype
|
||||
enum ggml_type output_tensor_type; // output tensor type
|
||||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue