Don't multiply embeddings with embedding_multiplier_scale as it happens in llama.cpp.

2024-05-10 12:40:05 +02:00 · 2024-05-10 12:40:05 +02:00 · 9a0629d545
commit 9a0629d545
parent ef671c693d
1 changed files with 1 additions and 5 deletions
--- a/convert_grok.py
+++ b/convert_grok.py
@ -270,13 +270,9 @@ def convert_weight(name, weight, scales, config, dtype=torch.float32, device=Non
        else:
            weight = weight * scale
-    if name == "token_embd":
+    if name != "token_embd" and len(weight.shape) >= 2:
        weight *= config.embedding_multiplier_scale
    elif len(weight.shape) >= 2:
        # Transpose linear matrix
        weight = weight.transpose(-1, -2)
    if name.endswith("ffn_gate_inp") or name.endswith("_exps"):
        weight = weight[config.experts]  # gather.