Don't multiply embeddings with embedding_multiplier_scale as it happens in llama.cpp.

This commit is contained in:
Heiner 2024-05-10 12:40:05 +02:00
parent ef671c693d
commit 9a0629d545

View file

@ -270,13 +270,9 @@ def convert_weight(name, weight, scales, config, dtype=torch.float32, device=Non
else: else:
weight = weight * scale weight = weight * scale
if name == "token_embd": if name != "token_embd" and len(weight.shape) >= 2:
weight *= config.embedding_multiplier_scale
elif len(weight.shape) >= 2:
# Transpose linear matrix # Transpose linear matrix
weight = weight.transpose(-1, -2) weight = weight.transpose(-1, -2)
if name.endswith("ffn_gate_inp") or name.endswith("_exps"): if name.endswith("ffn_gate_inp") or name.endswith("_exps"):
weight = weight[config.experts] # gather. weight = weight[config.experts] # gather.