From 0141e6395cb3b0096c2a92a52089b5d0905fd024 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Wed, 21 Jun 2023 19:52:40 +0200 Subject: [PATCH] clean up previous hack --- convert.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/convert.py b/convert.py index 068c1dbf0..de6c39c67 100644 --- a/convert.py +++ b/convert.py @@ -649,18 +649,11 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel: out["norm.weight"] = model["model.norm.weight"] out["output.weight"] = model["lm_head.weight"] - n_embd = out["tok_embeddings.weight"].shape[1] - - n_head = n_embd // 128 # guessed - if "model.layers.0.self_attn.rotary_emb.inv_freq" in model: - dim_inv_freq = model["model.layers.0.self_attn.rotary_emb.inv_freq"].shape[0] - n_head = n_embd // (dim_inv_freq * 2) - for i in itertools.count(): if f"model.layers.{i}.self_attn.q_proj.weight" not in model: break - out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], n_head) - out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], n_head) + out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head) + out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head) out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"] out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]