Fixes for model conversion
Co-authored-by: FNsi <125447286+FNsi@users.noreply.github.com>
This commit is contained in:
parent
ff99507049
commit
aeacc57d3b
1 changed files with 4 additions and 2 deletions
|
@ -149,7 +149,7 @@ class Params:
|
|||
|
||||
# TODO: hack for open_llama_3b
|
||||
if n_embd == 3200:
|
||||
n_mult = 108
|
||||
n_mult = 216
|
||||
n_head = 32
|
||||
n_layer = 26
|
||||
|
||||
|
@ -607,7 +607,9 @@ def convert_transformers_to_orig(model: LazyModel) -> LazyModel:
|
|||
out["norm.weight"] = model["model.norm.weight"]
|
||||
out["output.weight"] = model["lm_head.weight"]
|
||||
|
||||
n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
|
||||
# TODO: hack for open_llama_3b
|
||||
n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
|
||||
n_head = 32 if n_embd == 3200 else n_embd // 128
|
||||
for i in itertools.count():
|
||||
if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
|
||||
break
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue