[Fix]: convert.py support baichuan7B
This commit is contained in:
parent
3a007648f2
commit
3aedf97ca0
1 changed files with 6 additions and 5 deletions
11
convert.py
11
convert.py
|
@ -469,7 +469,7 @@ class UnquantizedTensor(Tensor):
|
||||||
|
|
||||||
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
|
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
|
||||||
r = self.ndarray.shape[0] // 3
|
r = self.ndarray.shape[0] // 3
|
||||||
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
|
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head))
|
||||||
|
|
||||||
def part(self, n_part: int) -> 'UnquantizedTensor':
|
def part(self, n_part: int) -> 'UnquantizedTensor':
|
||||||
r = self.ndarray.shape[0] // 3
|
r = self.ndarray.shape[0] // 3
|
||||||
|
@ -952,16 +952,17 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||||
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
||||||
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
||||||
print(f"Unpacking and permuting layer {i}")
|
print(f"Unpacking and permuting layer {i}")
|
||||||
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
|
||||||
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
|
||||||
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
||||||
|
del tmp[f"model.layers.{i}.self_attn.W_pack.weight"]
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
out: LazyModel = {}
|
out: LazyModel = {}
|
||||||
for name, lazy_tensor in model.items():
|
for name, lazy_tensor in model.items():
|
||||||
name_new = name
|
name_new = name
|
||||||
|
|
||||||
if name in tmap:
|
if name in tmap:
|
||||||
name_new = tmap[name]
|
name_new = tmap[name]
|
||||||
elif name.endswith(".weight") and name[:-7] in tmap:
|
elif name.endswith(".weight") and name[:-7] in tmap:
|
||||||
|
@ -1112,7 +1113,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
||||||
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
||||||
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
|
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
if args.dump_single:
|
if args.dump_single:
|
||||||
model_plus = lazy_load_file(args.model)
|
model_plus = lazy_load_file(args.model)
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue