From 597bc152b21ce88fc51507ac0abe840d4f5a1c43 Mon Sep 17 00:00:00 2001 From: drollings Date: Thu, 10 Oct 2024 16:58:15 -0500 Subject: [PATCH] llama.cpp : fix --leave-output-tensor for llama-quantize. * Tweaked llama-quantize's --leave-output-tensor parameter's impact on llama_model_quantize_internal() to exclude any tensor named "*output.weight" instead of just "output.weight". --- src/llama.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index da7afb1ee..3ed1dd7ae 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -18512,7 +18512,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // do not quantize norm tensors quantize &= name.find("_norm.weight") == std::string::npos; - quantize &= params->quantize_output_tensor || name != "output.weight"; + // While there's an effort to avoid hardcoded tensor names, + // --leave-output-tensor should still exclude any tensor named + // *output.weight instead of just output.weight. + quantize &= params->quantize_output_tensor || (name.find("output.weight") == std::string::npos); quantize &= !params->only_copy; // do not quantize expert gating tensors