Don't multiply embeddings with embedding_multiplier_scale as it happens in llama.cpp.

This commit is contained in:
Heiner 2024-05-10 12:40:05 +02:00
parent ef671c693d
commit 9a0629d545

View file

@ -270,13 +270,9 @@ def convert_weight(name, weight, scales, config, dtype=torch.float32, device=Non
else:
weight = weight * scale
if name == "token_embd":
weight *= config.embedding_multiplier_scale
elif len(weight.shape) >= 2:
if name != "token_embd" and len(weight.shape) >= 2:
# Transpose linear matrix
weight = weight.transpose(-1, -2)
if name.endswith("ffn_gate_inp") or name.endswith("_exps"):
weight = weight[config.experts] # gather.