From aeacc57d3b89ca440a25c7fbbffa5a65cfba946f Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Thu, 25 May 2023 12:15:33 +0300 Subject: [PATCH] Fixes for model conversion Co-authored-by: FNsi <125447286+FNsi@users.noreply.github.com> --- convert.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index d06bacef2..d24a63c10 100644 --- a/convert.py +++ b/convert.py @@ -149,7 +149,7 @@ class Params: # TODO: hack for open_llama_3b if n_embd == 3200: - n_mult = 108 + n_mult = 216 n_head = 32 n_layer = 26 @@ -607,7 +607,9 @@ def convert_transformers_to_orig(model: LazyModel) -> LazyModel: out["norm.weight"] = model["model.norm.weight"] out["output.weight"] = model["lm_head.weight"] - n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128 + # TODO: hack for open_llama_3b + n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1] + n_head = 32 if n_embd == 3200 else n_embd // 128 for i in itertools.count(): if f"model.layers.{i}.self_attn.q_proj.weight" not in model: break