Update llama_model_quantize_params
This commit is contained in:
parent
d6e453eb6c
commit
141eb5107f
2 changed files with 2 additions and 1 deletions
|
@ -14184,6 +14184,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||||
/*.quantize_output_tensor =*/ true,
|
/*.quantize_output_tensor =*/ true,
|
||||||
/*.only_copy =*/ false,
|
/*.only_copy =*/ false,
|
||||||
/*.pure =*/ false,
|
/*.pure =*/ false,
|
||||||
|
/*.keep_split =*/ false,
|
||||||
/*.imatrix =*/ nullptr,
|
/*.imatrix =*/ nullptr,
|
||||||
/*.kv_overrides =*/ nullptr,
|
/*.kv_overrides =*/ nullptr,
|
||||||
};
|
};
|
||||||
|
|
2
llama.h
2
llama.h
|
@ -288,9 +288,9 @@ extern "C" {
|
||||||
bool quantize_output_tensor; // quantize output.weight
|
bool quantize_output_tensor; // quantize output.weight
|
||||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||||
bool pure; // quantize all tensors to the default type
|
bool pure; // quantize all tensors to the default type
|
||||||
|
bool keep_split; // quantize to the same number of shards
|
||||||
void * imatrix; // pointer to importance matrix data
|
void * imatrix; // pointer to importance matrix data
|
||||||
void * kv_overrides; // pointer to vector containing overrides
|
void * kv_overrides; // pointer to vector containing overrides
|
||||||
bool keep_split; // quantize to the same number of shards
|
|
||||||
} llama_model_quantize_params;
|
} llama_model_quantize_params;
|
||||||
|
|
||||||
// grammar types
|
// grammar types
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue