llama : do not quantize expert gating tensors
This commit is contained in:
parent
6cfb31f9ea
commit
d1259b7b35
1 changed files with 3 additions and 0 deletions
|
@ -8443,6 +8443,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||||
quantize &= !params->only_copy;
|
quantize &= !params->only_copy;
|
||||||
|
|
||||||
|
// do not quantize expert gating tensors
|
||||||
|
quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
||||||
|
|
||||||
enum ggml_type new_type;
|
enum ggml_type new_type;
|
||||||
void * new_data;
|
void * new_data;
|
||||||
size_t new_size;
|
size_t new_size;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue