llama.cpp : fix --leave-output-tensor for llama-quantize.
* Tweaked llama-quantize's --leave-output-tensor parameter's impact on llama_model_quantize_internal() to exclude any tensor named "*output.weight" instead of just "output.weight".
This commit is contained in:
parent
7eee341bee
commit
597bc152b2
1 changed files with 4 additions and 1 deletions
|
@ -18512,7 +18512,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
// do not quantize norm tensors
|
||||
quantize &= name.find("_norm.weight") == std::string::npos;
|
||||
|
||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||
// While there's an effort to avoid hardcoded tensor names,
|
||||
// --leave-output-tensor should still exclude any tensor named
|
||||
// *output.weight instead of just output.weight.
|
||||
quantize &= params->quantize_output_tensor || (name.find("output.weight") == std::string::npos);
|
||||
quantize &= !params->only_copy;
|
||||
|
||||
// do not quantize expert gating tensors
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue