From 3aedf97ca07d67d29ba0ec7619acc6759283182e Mon Sep 17 00:00:00 2001 From: jameswu2014 <545426914@qq.com> Date: Tue, 29 Aug 2023 16:37:59 +0800 Subject: [PATCH] [Fix]: convert.py support baichuan7B --- convert.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/convert.py b/convert.py index a15e6ccd2..a5e7e6130 100755 --- a/convert.py +++ b/convert.py @@ -469,7 +469,7 @@ class UnquantizedTensor(Tensor): def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': r = self.ndarray.shape[0] // 3 - return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head)) + return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head)) def part(self, n_part: int) -> 'UnquantizedTensor': r = self.ndarray.shape[0] // 3 @@ -952,16 +952,17 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel: #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"] elif f"model.layers.{i}.self_attn.W_pack.weight" in model: print(f"Unpacking and permuting layer {i}") - tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head) - tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv) + tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head) + tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head) tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2) + del tmp[f"model.layers.{i}.self_attn.W_pack.weight"] else: break out: LazyModel = {} for name, lazy_tensor in model.items(): name_new = name - + if name in tmap: name_new = tmap[name] elif name.endswith(".weight") and name[:-7] in tmap: @@ -1112,7 +1113,7 @@ def main(args_in: Optional[List[str]] = None) -> None: parser.add_argument("--ctx", type=int, help="model training context (default: based on input)") parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY) args = parser.parse_args(args_in) - + if args.dump_single: model_plus = lazy_load_file(args.model) do_dump_model(model_plus)