From 81f308ad646d4bb295ab70bf089dcaab03eeb15e Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 8 Apr 2024 15:04:18 +0200 Subject: [PATCH] llama: dbrx: fix experts tensor layout --- convert-hf-to-gguf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 7cdd39a27..94b392be9 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1523,9 +1523,10 @@ class DbrxModel(Model): n_embd = self.hparams["d_model"] # Specific behavior for experts tensors: reshape to 3D and add suffix .weight - exp_tensor_names = {"ffn.experts.mlp.v1": (2, 1, 3), # LLM_TENSOR_FFN_GATE_EXPS(n_embd, n_ff, n_expert) - "ffn.experts.mlp.w2": (1, 2, 3), # LLM_TENSOR_FFN_DOWN_EXPS(n_ff, n_embd, n_expert) - "ffn.experts.mlp.w1": (2, 1, 3)} # LLM_TENSOR_FFN_UP_EXPS (n_embd, n_ff, n_expert) + # orginal implementation expects (n_expert, n_ff, n_embd) + exp_tensor_names = {"ffn.experts.mlp.v1": (2, 1, 0), # LLM_TENSOR_FFN_GATE_EXPS(n_embd, n_ff, n_expert) + "ffn.experts.mlp.w2": (1, 2, 0), # LLM_TENSOR_FFN_DOWN_EXPS(n_ff, n_embd, n_expert) + "ffn.experts.mlp.w1": (2, 1, 0)} # LLM_TENSOR_FFN_UP_EXPS (n_embd, n_ff, n_expert) experts = False for exp_tensor_name in exp_tensor_names.keys(): if name.find(exp_tensor_name) != -1 and name.find(".weight") == -1: