From aeacc57d3b89ca440a25c7fbbffa5a65cfba946f Mon Sep 17 00:00:00 2001
From: Henri Vasserman <henv@hot.ee>
Date: Thu, 25 May 2023 12:15:33 +0300
Subject: [PATCH] Fixes for model conversion

Co-authored-by: FNsi <125447286+FNsi@users.noreply.github.com>
---
 convert.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/convert.py b/convert.py
index d06bacef2..d24a63c10 100644
--- a/convert.py
+++ b/convert.py
@@ -149,7 +149,7 @@ class Params:
 
         # TODO: hack for open_llama_3b
         if n_embd == 3200:
-            n_mult = 108
+            n_mult = 216
             n_head = 32
             n_layer = 26
 
@@ -607,7 +607,9 @@ def convert_transformers_to_orig(model: LazyModel) -> LazyModel:
     out["norm.weight"] = model["model.norm.weight"]
     out["output.weight"] = model["lm_head.weight"]
 
-    n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
+    # TODO: hack for open_llama_3b
+    n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
+    n_head = 32 if n_embd == 3200 else n_embd // 128
     for i in itertools.count():
         if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
             break