diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 3db790b2b..98df16a69 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -778,6 +778,7 @@ class BaichuanModel(Model): r = weights.shape[0] // 3 return weights[r * n_part:r * n_part + r, ...] + @Model.register("XverseForCausalLM") class XverseModel(Model): model_arch = gguf.MODEL_ARCH.XVERSE @@ -882,7 +883,7 @@ class XverseModel(Model): data_torch = self._reverse_hf_permute(data_torch, head_count, head_count) if name.endswith(("k_proj.weight")): data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv) - + data = data_torch.squeeze().numpy() # map tensor names @@ -918,7 +919,8 @@ class XverseModel(Model): .swapaxes(1, 2) .reshape(weights.shape) ) - + + @Model.register("FalconForCausalLM", "RWForCausalLM") class FalconModel(Model): model_arch = gguf.MODEL_ARCH.FALCON diff --git a/llama.cpp b/llama.cpp index a8f675bde..4971fa0ab 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6522,7 +6522,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } if (il == n_layer - 1) {