llama : fix F16/F32 downcast + improve names (#5980)

This commit is contained in:
Georgi Gerganov 2024-03-11 09:56:47 +02:00
parent be858f6205
commit ee35600b90
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 36 additions and 33 deletions

View file

@ -278,7 +278,7 @@ extern "C" {
bool allow_requantize; // allow quantizing non-f32/f16 tensors
bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
bool pure; // quantize all tensors to the default type
void * imatrix; // pointer to importance matrix data
} llama_model_quantize_params;