diff --git a/convert.py b/convert.py index 39c52524d..94728015f 100755 --- a/convert.py +++ b/convert.py @@ -1257,19 +1257,19 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> # merge experts into one tensor if params.n_experts > 0: - for l in range(params.n_layer): + for i_l in range(params.n_layer): for w in range(1, 4): experts = [] for e in range(params.n_experts): - if f"layers.{l}.feed_forward.experts.{e}.w{w}.weight" in model: - experts.append(model[f"layers.{l}.feed_forward.experts.{e}.w{w}.weight"]) - del tmp[f"layers.{l}.feed_forward.experts.{e}.w{w}.weight"] - elif f"model.layers.{l}.block_sparse_moe.experts.{e}.w{w}.weight" in model: - experts.append(model[f"model.layers.{l}.block_sparse_moe.experts.{e}.w{w}.weight"]) - del tmp[f"model.layers.{l}.block_sparse_moe.experts.{e}.w{w}.weight"] + if f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight" in model: + experts.append(model[f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight"]) + del tmp[f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight"] + elif f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight" in model: + experts.append(model[f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight"]) + del tmp[f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight"] else: - raise ValueError(f"Expert tensor not found: layers.{l}.feed_forward.experts.{e}.w{w}.weight") - tmp[f"layers.{l}.feed_forward.experts.w{w}.weight"] = pack_experts_lazy(experts) + raise ValueError(f"Expert tensor not found: layers.{i_l}.feed_forward.experts.{e}.w{w}.weight") + tmp[f"layers.{i_l}.feed_forward.experts.w{w}.weight"] = pack_experts_lazy(experts) # HF models permut or pack some of the tensors, so we need to undo that for i in itertools.count():