diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8fc4c4f56..bf6da9601 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2055,7 +2055,11 @@ class WavTokenizerDecModel(Model): def set_gguf_parameters(self): super().set_gguf_parameters() - self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) + self.gguf_writer.add_vocab_size (self.hparams["vocab_size"]) + self.gguf_writer.add_features_length (self.hparams["n_embd_features"]) + self.gguf_writer.add_posnet_length (self.hparams["n_embd_posnet"]) + self.gguf_writer.add_convnext_length (self.hparams["n_embd_convnext"]) + self.gguf_writer.add_feed_forward_length(self.hparams["n_ff"]) @Model.register("Qwen2MoeForCausalLM") diff --git a/examples/tts/convert_pt_to_hf.py b/examples/tts/convert_pt_to_hf.py index 8d68290e2..4938515af 100644 --- a/examples/tts/convert_pt_to_hf.py +++ b/examples/tts/convert_pt_to_hf.py @@ -147,6 +147,10 @@ config = { "WavTokenizerDec" ], "hidden_size": 1282, + "n_embd_features": 512, + "n_embd_posnet": 768, + "n_embd_convnext": 768, + "n_ff": 2304, "vocab_size": 4096, "n_head": 1, "layer_norm_epsilon": 1e-6, diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index af2a4f4f4..e8a31f114 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -90,6 +90,9 @@ class Keys: VOCAB_SIZE = "{arch}.vocab_size" CONTEXT_LENGTH = "{arch}.context_length" EMBEDDING_LENGTH = "{arch}.embedding_length" + FEATURES_LENGTH = "{arch}.features_length" + POSNET_LENGTH = "{arch}.posnet_length" + CONVNEXT_LENGTH = "{arch}.convnext_length" BLOCK_COUNT = "{arch}.block_count" LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count" FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 65a64e10d..8266ba99f 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -631,6 +631,18 @@ class GGUFWriter: def add_embedding_length(self, length: int) -> None: self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) + def add_embedding_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) + + def add_features_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length) + + def add_posnet_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.POSNET_LENGTH.format(arch=self.arch), length) + + def add_convnext_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.CONVNEXT_LENGTH.format(arch=self.arch), length) + def add_block_count(self, length: int) -> None: self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) diff --git a/src/llama.cpp b/src/llama.cpp index 9aa5ad17a..f0c4b5e62 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -77,6 +77,7 @@ #endif // bump if necessary +#define LLAMA_MAX_EMBD 8 #define LLAMA_MAX_LAYERS 512 #define LLAMA_MAX_EXPERTS 160 // DeepSeekV2 @@ -275,6 +276,9 @@ enum llm_kv { LLM_KV_VOCAB_SIZE, LLM_KV_CONTEXT_LENGTH, LLM_KV_EMBEDDING_LENGTH, + LLM_KV_FEATURES_LENGTH, + LLM_KV_POSNET_LENGTH, + LLM_KV_CONVNEXT_LENGTH, LLM_KV_BLOCK_COUNT, LLM_KV_LEADING_DENSE_BLOCK_COUNT, LLM_KV_FEED_FORWARD_LENGTH, @@ -392,6 +396,9 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" }, + { LLM_KV_FEATURES_LENGTH, "%s.features_length" }, + { LLM_KV_POSNET_LENGTH, "%s.posnet_length" }, + { LLM_KV_CONVNEXT_LENGTH, "%s.convnext_length" }, { LLM_KV_BLOCK_COUNT, "%s.block_count" }, { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" }, { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" }, @@ -2545,6 +2552,11 @@ struct llama_hparams { uint32_t n_vocab_type = 0; // for BERT-style token types uint32_t n_rel_attn_bkts = 0; + // for WavTokenizer + uint32_t n_embd_features = 0; + uint32_t n_embd_posnet = 0; + uint32_t n_embd_convnext = 0; + std::array n_head_arr; std::array n_head_kv_arr; std::array n_ff_arr; @@ -5684,6 +5696,12 @@ static void llm_load_hparams( ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false); ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false); + if (model.arch == LLM_ARCH_WAVTOKENIZER_DEC) { + ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features); + ml.get_key(LLM_KV_POSNET_LENGTH, hparams.n_embd_posnet); + ml.get_key(LLM_KV_CONVNEXT_LENGTH, hparams.n_embd_convnext); + } + GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS); GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert); if (hparams.n_expert > 0) { @@ -5692,7 +5710,7 @@ static void llm_load_hparams( GGML_ASSERT(hparams.n_expert_used == 0); } - // zero-out the per-layer hparams + // zero-out the array hparams std::fill(hparams.n_head_arr.begin(), hparams.n_head_arr.end(), 0); std::fill(hparams.n_head_kv_arr.begin(), hparams.n_head_kv_arr.end(), 0); std::fill(hparams.n_ff_arr.begin(), hparams.n_ff_arr.end(), 0); @@ -7577,7 +7595,7 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w } break; case GGML_OP_IM2COL: { - int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd; ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd, w->ne[1], 1, 1); op_tensor = ggml_im2col(ctx, w, b, 1, 0, 0, 0, 1, 0, false, GGML_TYPE_F16); } break; @@ -9547,104 +9565,107 @@ static bool llm_load_tensors( } break; case LLM_ARCH_WAVTOKENIZER_DEC: { - model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {512, n_vocab}, 0); + const int64_t n_embd_features = hparams.n_embd_features; + const int64_t n_embd_posnet = hparams.n_embd_posnet; + const int64_t n_embd_convnext = hparams.n_embd_convnext; - model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0); - model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0); + model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd_features, n_vocab}, 0); - model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, 512, 768}, 0); - model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0); + model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd_posnet}, 0); + model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd_posnet}, 0); - model.posnet_0_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 0), {768}, 0); - model.posnet_0_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 0), {768}, 0); + model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd_features, n_embd_posnet}, 0); + model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {n_embd_posnet}, 0); - model.posnet_0_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 0), {3, 768, 768}, 0); - model.posnet_0_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 0), {768}, 0); + model.posnet_0_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 0), {n_embd_posnet}, 0); + model.posnet_0_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 0), {n_embd_posnet}, 0); - model.posnet_0_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 0), {768}, 0); - model.posnet_0_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 0), {768}, 0); + model.posnet_0_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 0), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_0_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 0), {n_embd_posnet}, 0); - model.posnet_0_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 0), {3, 768, 768}, 0); - model.posnet_0_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 0), {768}, 0); + model.posnet_0_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 0), {n_embd_posnet}, 0); + model.posnet_0_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 0), {n_embd_posnet}, 0); - model.posnet_1_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 1), {768}, 0); - model.posnet_1_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 1), {768}, 0); + model.posnet_0_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 0), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_0_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 0), {n_embd_posnet}, 0); - model.posnet_1_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 1), {3, 768, 768}, 0); - model.posnet_1_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 1), {768}, 0); + model.posnet_1_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 1), {n_embd_posnet}, 0); + model.posnet_1_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 1), {n_embd_posnet}, 0); - model.posnet_1_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 1), {768}, 0); - model.posnet_1_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 1), {768}, 0); + model.posnet_1_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 1), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_1_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 1), {n_embd_posnet}, 0); - model.posnet_1_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 1), {3, 768, 768}, 0); - model.posnet_1_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 1), {768}, 0); + model.posnet_1_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 1), {n_embd_posnet}, 0); + model.posnet_1_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 1), {n_embd_posnet}, 0); - model.posnet_2_attn_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 2), {768}, 0); - model.posnet_2_attn_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 2), {768}, 0); + model.posnet_1_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 1), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_1_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 1), {n_embd_posnet}, 0); - model.posnet_2_attn_q = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "weight", 2), {1, 768, 768}, 0); - model.posnet_2_attn_q_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "bias", 2), {768}, 0); + model.posnet_2_attn_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 2), {n_embd_posnet}, 0); + model.posnet_2_attn_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 2), {n_embd_posnet}, 0); - model.posnet_2_attn_k = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "weight", 2), {1, 768, 768}, 0); - model.posnet_2_attn_k_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "bias", 2), {768}, 0); + model.posnet_2_attn_q = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_2_attn_q_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "bias", 2), {n_embd_posnet}, 0); - model.posnet_2_attn_v = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "weight", 2), {1, 768, 768}, 0); - model.posnet_2_attn_v_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "bias", 2), {768}, 0); + model.posnet_2_attn_k = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_2_attn_k_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "bias", 2), {n_embd_posnet}, 0); - model.posnet_2_attn_o = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "weight", 2), {1, 768, 768}, 0); - model.posnet_2_attn_o_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "bias", 2), {768}, 0); + model.posnet_2_attn_v = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_2_attn_v_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "bias", 2), {n_embd_posnet}, 0); - model.posnet_3_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 3), {768}, 0); - model.posnet_3_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 3), {768}, 0); + model.posnet_2_attn_o = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_2_attn_o_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "bias", 2), {n_embd_posnet}, 0); - model.posnet_3_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 3), {3, 768, 768}, 0); - model.posnet_3_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 3), {768}, 0); + model.posnet_3_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 3), {n_embd_posnet}, 0); + model.posnet_3_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 3), {n_embd_posnet}, 0); - model.posnet_3_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 3), {768}, 0); - model.posnet_3_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 3), {768}, 0); + model.posnet_3_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 3), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_3_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 3), {n_embd_posnet}, 0); - model.posnet_3_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 3), {3, 768, 768}, 0); - model.posnet_3_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 3), {768}, 0); + model.posnet_3_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 3), {n_embd_posnet}, 0); + model.posnet_3_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 3), {n_embd_posnet}, 0); - model.posnet_4_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 4), {768}, 0); - model.posnet_4_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 4), {768}, 0); + model.posnet_3_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 3), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_3_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 3), {n_embd_posnet}, 0); - model.posnet_4_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 4), {3, 768, 768}, 0); - model.posnet_4_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 4), {768}, 0); + model.posnet_4_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 4), {n_embd_posnet}, 0); + model.posnet_4_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 4), {n_embd_posnet}, 0); - model.posnet_4_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 4), {768}, 0); - model.posnet_4_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 4), {768}, 0); + model.posnet_4_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 4), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_4_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 4), {n_embd_posnet}, 0); - model.posnet_4_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 4), {3, 768, 768}, 0); - model.posnet_4_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 4), {768}, 0); + model.posnet_4_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 4), {n_embd_posnet}, 0); + model.posnet_4_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 4), {n_embd_posnet}, 0); - model.posnet_5_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 5), {768}, 0); - model.posnet_5_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 5), {768}, 0); + model.posnet_4_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 4), {3, n_embd_posnet, n_embd_posnet}, 0); + model.posnet_4_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 4), {n_embd_posnet}, 0); + + model.posnet_5_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 5), {n_embd_posnet}, 0); + model.posnet_5_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 5), {n_embd_posnet}, 0); for (int i = 0; i < n_layer; ++i) { auto & layer = model.layers[i]; - layer.convnext_dw = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "weight", i), {7, 1, 768}, 0); - layer.convnext_dw_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "bias", i), {768}, 0); + layer.convnext_dw = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "weight", i), {7, 1, n_embd_convnext}, 0); + layer.convnext_dw_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "bias", i), {n_embd_convnext}, 0); - layer.convnext_norm = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "weight", i), {768}, 0); - layer.convnext_norm_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "bias", i), {768}, 0); + layer.convnext_norm = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "weight", i), {n_embd_convnext}, 0); + layer.convnext_norm_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "bias", i), {n_embd_convnext}, 0); - // TODO: n_ff - layer.convnext_pw1 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "weight", i), {768, 2304}, 0); - layer.convnext_pw1_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "bias", i), {2304}, 0); + layer.convnext_pw1 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "weight", i), {n_embd_convnext, n_ff}, 0); + layer.convnext_pw1_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "bias", i), {n_ff}, 0); - layer.convnext_pw2 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "weight", i), {2304, 768}, 0); - layer.convnext_pw2_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "bias", i), {768}, 0); + layer.convnext_pw2 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "weight", i), {n_ff, n_embd_convnext}, 0); + layer.convnext_pw2_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "bias", i), {n_embd_convnext}, 0); - layer.convnext_gamma = create_tensor(tn(LLM_TENSOR_CONV_NEXT_GAMMA, "weight", i), {768}, 0); + layer.convnext_gamma = create_tensor(tn(LLM_TENSOR_CONV_NEXT_GAMMA, "weight", i), {n_embd_convnext}, 0); } // output - model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {768}, 0); - model.output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {768}, 0); + model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd_convnext}, 0); + model.output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd_convnext}, 0); - model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {768, n_embd}, 0); + model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd_convnext, n_embd}, 0); model.output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0); } break; default: @@ -17317,7 +17338,7 @@ struct llm_build_context { struct ggml_tensor * kq = ggml_mul_mat(ctx0, k, q); - kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(768)), 0.0f); + kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(model.hparams.n_embd_posnet)), 0.0f); cur = ggml_mul_mat(ctx0, kq, v);