From 1c8b3e4c4464baab8507a7279bc1367985b3488f Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Mon, 23 Sep 2024 13:59:54 -0600 Subject: [PATCH] fix: Allow "output" layer in granite moe architecture (convert and cpp) Branch: GraniteMoE Co-Authored-By: git@compilade.net Signed-off-by: Gabe Goodhart --- gguf-py/gguf/constants.py | 1 + src/llama.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index fed7418be..bd83be144 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1247,6 +1247,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_ARCH.GRANITE_MOE: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, MODEL_TENSOR.ATTN_NORM, MODEL_TENSOR.ATTN_Q, MODEL_TENSOR.ATTN_K, diff --git a/src/llama.cpp b/src/llama.cpp index 65aa6cbeb..f268dd91e 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1485,6 +1485,7 @@ static const std::map> LLM_TENSOR_NA { { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },