tts : remove hardcoded constants

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-12-16 15:07:38 +02:00
parent e70f140c04
commit c096bbd8dd
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 110 additions and 66 deletions

View file

@ -2055,7 +2055,11 @@ class WavTokenizerDecModel(Model):
def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
self.gguf_writer.add_vocab_size (self.hparams["vocab_size"])
self.gguf_writer.add_features_length (self.hparams["n_embd_features"])
self.gguf_writer.add_posnet_length (self.hparams["n_embd_posnet"])
self.gguf_writer.add_convnext_length (self.hparams["n_embd_convnext"])
self.gguf_writer.add_feed_forward_length(self.hparams["n_ff"])
@Model.register("Qwen2MoeForCausalLM")

View file

@ -147,6 +147,10 @@ config = {
"WavTokenizerDec"
],
"hidden_size": 1282,
"n_embd_features": 512,
"n_embd_posnet": 768,
"n_embd_convnext": 768,
"n_ff": 2304,
"vocab_size": 4096,
"n_head": 1,
"layer_norm_epsilon": 1e-6,

View file

@ -90,6 +90,9 @@ class Keys:
VOCAB_SIZE = "{arch}.vocab_size"
CONTEXT_LENGTH = "{arch}.context_length"
EMBEDDING_LENGTH = "{arch}.embedding_length"
FEATURES_LENGTH = "{arch}.features_length"
POSNET_LENGTH = "{arch}.posnet_length"
CONVNEXT_LENGTH = "{arch}.convnext_length"
BLOCK_COUNT = "{arch}.block_count"
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"

View file

@ -631,6 +631,18 @@ class GGUFWriter:
def add_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
def add_embedding_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
def add_features_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
def add_posnet_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.POSNET_LENGTH.format(arch=self.arch), length)
def add_convnext_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.CONVNEXT_LENGTH.format(arch=self.arch), length)
def add_block_count(self, length: int) -> None:
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)

View file

@ -77,6 +77,7 @@
#endif
// bump if necessary
#define LLAMA_MAX_EMBD 8
#define LLAMA_MAX_LAYERS 512
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
@ -275,6 +276,9 @@ enum llm_kv {
LLM_KV_VOCAB_SIZE,
LLM_KV_CONTEXT_LENGTH,
LLM_KV_EMBEDDING_LENGTH,
LLM_KV_FEATURES_LENGTH,
LLM_KV_POSNET_LENGTH,
LLM_KV_CONVNEXT_LENGTH,
LLM_KV_BLOCK_COUNT,
LLM_KV_LEADING_DENSE_BLOCK_COUNT,
LLM_KV_FEED_FORWARD_LENGTH,
@ -392,6 +396,9 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
{ LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
{ LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
{ LLM_KV_FEATURES_LENGTH, "%s.features_length" },
{ LLM_KV_POSNET_LENGTH, "%s.posnet_length" },
{ LLM_KV_CONVNEXT_LENGTH, "%s.convnext_length" },
{ LLM_KV_BLOCK_COUNT, "%s.block_count" },
{ LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
{ LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
@ -2545,6 +2552,11 @@ struct llama_hparams {
uint32_t n_vocab_type = 0; // for BERT-style token types
uint32_t n_rel_attn_bkts = 0;
// for WavTokenizer
uint32_t n_embd_features = 0;
uint32_t n_embd_posnet = 0;
uint32_t n_embd_convnext = 0;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
@ -5684,6 +5696,12 @@ static void llm_load_hparams(
ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false);
ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
if (model.arch == LLM_ARCH_WAVTOKENIZER_DEC) {
ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
ml.get_key(LLM_KV_POSNET_LENGTH, hparams.n_embd_posnet);
ml.get_key(LLM_KV_CONVNEXT_LENGTH, hparams.n_embd_convnext);
}
GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS);
GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert);
if (hparams.n_expert > 0) {
@ -5692,7 +5710,7 @@ static void llm_load_hparams(
GGML_ASSERT(hparams.n_expert_used == 0);
}
// zero-out the per-layer hparams
// zero-out the array hparams
std::fill(hparams.n_head_arr.begin(), hparams.n_head_arr.end(), 0);
std::fill(hparams.n_head_kv_arr.begin(), hparams.n_head_kv_arr.end(), 0);
std::fill(hparams.n_ff_arr.begin(), hparams.n_ff_arr.end(), 0);
@ -7577,7 +7595,7 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w
} break;
case GGML_OP_IM2COL:
{
int n_embd = hparams.n_embd;
const int n_embd = hparams.n_embd;
ggml_tensor * b = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, n_embd, w->ne[1], 1, 1);
op_tensor = ggml_im2col(ctx, w, b, 1, 0, 0, 0, 1, 0, false, GGML_TYPE_F16);
} break;
@ -9547,104 +9565,107 @@ static bool llm_load_tensors(
} break;
case LLM_ARCH_WAVTOKENIZER_DEC:
{
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {512, n_vocab}, 0);
const int64_t n_embd_features = hparams.n_embd_features;
const int64_t n_embd_posnet = hparams.n_embd_posnet;
const int64_t n_embd_convnext = hparams.n_embd_convnext;
model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0);
model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0);
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd_features, n_vocab}, 0);
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, 512, 768}, 0);
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0);
model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd_posnet}, 0);
model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd_posnet}, 0);
model.posnet_0_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 0), {768}, 0);
model.posnet_0_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 0), {768}, 0);
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd_features, n_embd_posnet}, 0);
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {n_embd_posnet}, 0);
model.posnet_0_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 0), {3, 768, 768}, 0);
model.posnet_0_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 0), {768}, 0);
model.posnet_0_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 0), {n_embd_posnet}, 0);
model.posnet_0_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 0), {n_embd_posnet}, 0);
model.posnet_0_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 0), {768}, 0);
model.posnet_0_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 0), {768}, 0);
model.posnet_0_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 0), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_0_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 0), {n_embd_posnet}, 0);
model.posnet_0_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 0), {3, 768, 768}, 0);
model.posnet_0_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 0), {768}, 0);
model.posnet_0_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 0), {n_embd_posnet}, 0);
model.posnet_0_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 0), {n_embd_posnet}, 0);
model.posnet_1_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 1), {768}, 0);
model.posnet_1_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 1), {768}, 0);
model.posnet_0_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 0), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_0_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 0), {n_embd_posnet}, 0);
model.posnet_1_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 1), {3, 768, 768}, 0);
model.posnet_1_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 1), {768}, 0);
model.posnet_1_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 1), {n_embd_posnet}, 0);
model.posnet_1_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 1), {n_embd_posnet}, 0);
model.posnet_1_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 1), {768}, 0);
model.posnet_1_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 1), {768}, 0);
model.posnet_1_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 1), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_1_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 1), {n_embd_posnet}, 0);
model.posnet_1_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 1), {3, 768, 768}, 0);
model.posnet_1_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 1), {768}, 0);
model.posnet_1_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 1), {n_embd_posnet}, 0);
model.posnet_1_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 1), {n_embd_posnet}, 0);
model.posnet_2_attn_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 2), {768}, 0);
model.posnet_2_attn_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 2), {768}, 0);
model.posnet_1_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 1), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_1_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 1), {n_embd_posnet}, 0);
model.posnet_2_attn_q = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "weight", 2), {1, 768, 768}, 0);
model.posnet_2_attn_q_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "bias", 2), {768}, 0);
model.posnet_2_attn_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 2), {n_embd_posnet}, 0);
model.posnet_2_attn_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 2), {n_embd_posnet}, 0);
model.posnet_2_attn_k = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "weight", 2), {1, 768, 768}, 0);
model.posnet_2_attn_k_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "bias", 2), {768}, 0);
model.posnet_2_attn_q = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_2_attn_q_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_Q, "bias", 2), {n_embd_posnet}, 0);
model.posnet_2_attn_v = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "weight", 2), {1, 768, 768}, 0);
model.posnet_2_attn_v_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "bias", 2), {768}, 0);
model.posnet_2_attn_k = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_2_attn_k_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_K, "bias", 2), {n_embd_posnet}, 0);
model.posnet_2_attn_o = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "weight", 2), {1, 768, 768}, 0);
model.posnet_2_attn_o_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "bias", 2), {768}, 0);
model.posnet_2_attn_v = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_2_attn_v_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_V, "bias", 2), {n_embd_posnet}, 0);
model.posnet_3_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 3), {768}, 0);
model.posnet_3_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 3), {768}, 0);
model.posnet_2_attn_o = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "weight", 2), {1, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_2_attn_o_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_OUT, "bias", 2), {n_embd_posnet}, 0);
model.posnet_3_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 3), {3, 768, 768}, 0);
model.posnet_3_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 3), {768}, 0);
model.posnet_3_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 3), {n_embd_posnet}, 0);
model.posnet_3_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 3), {n_embd_posnet}, 0);
model.posnet_3_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 3), {768}, 0);
model.posnet_3_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 3), {768}, 0);
model.posnet_3_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 3), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_3_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 3), {n_embd_posnet}, 0);
model.posnet_3_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 3), {3, 768, 768}, 0);
model.posnet_3_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 3), {768}, 0);
model.posnet_3_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 3), {n_embd_posnet}, 0);
model.posnet_3_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 3), {n_embd_posnet}, 0);
model.posnet_4_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 4), {768}, 0);
model.posnet_4_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 4), {768}, 0);
model.posnet_3_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 3), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_3_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 3), {n_embd_posnet}, 0);
model.posnet_4_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 4), {3, 768, 768}, 0);
model.posnet_4_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 4), {768}, 0);
model.posnet_4_norm1 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "weight", 4), {n_embd_posnet}, 0);
model.posnet_4_norm1_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM1, "bias", 4), {n_embd_posnet}, 0);
model.posnet_4_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 4), {768}, 0);
model.posnet_4_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 4), {768}, 0);
model.posnet_4_conv1 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "weight", 4), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_4_conv1_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV1, "bias", 4), {n_embd_posnet}, 0);
model.posnet_4_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 4), {3, 768, 768}, 0);
model.posnet_4_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 4), {768}, 0);
model.posnet_4_norm2 = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "weight", 4), {n_embd_posnet}, 0);
model.posnet_4_norm2_b = create_tensor(tn(LLM_TENSOR_POS_NET_NORM2, "bias", 4), {n_embd_posnet}, 0);
model.posnet_5_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 5), {768}, 0);
model.posnet_5_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 5), {768}, 0);
model.posnet_4_conv2 = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "weight", 4), {3, n_embd_posnet, n_embd_posnet}, 0);
model.posnet_4_conv2_b = create_tensor(tn(LLM_TENSOR_POS_NET_CONV2, "bias", 4), {n_embd_posnet}, 0);
model.posnet_5_norm = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "weight", 5), {n_embd_posnet}, 0);
model.posnet_5_norm_b = create_tensor(tn(LLM_TENSOR_POS_NET_ATTN_NORM, "bias", 5), {n_embd_posnet}, 0);
for (int i = 0; i < n_layer; ++i) {
auto & layer = model.layers[i];
layer.convnext_dw = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "weight", i), {7, 1, 768}, 0);
layer.convnext_dw_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "bias", i), {768}, 0);
layer.convnext_dw = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "weight", i), {7, 1, n_embd_convnext}, 0);
layer.convnext_dw_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_DW, "bias", i), {n_embd_convnext}, 0);
layer.convnext_norm = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "weight", i), {768}, 0);
layer.convnext_norm_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "bias", i), {768}, 0);
layer.convnext_norm = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "weight", i), {n_embd_convnext}, 0);
layer.convnext_norm_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_NORM, "bias", i), {n_embd_convnext}, 0);
// TODO: n_ff
layer.convnext_pw1 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "weight", i), {768, 2304}, 0);
layer.convnext_pw1_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "bias", i), {2304}, 0);
layer.convnext_pw1 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "weight", i), {n_embd_convnext, n_ff}, 0);
layer.convnext_pw1_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW1, "bias", i), {n_ff}, 0);
layer.convnext_pw2 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "weight", i), {2304, 768}, 0);
layer.convnext_pw2_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "bias", i), {768}, 0);
layer.convnext_pw2 = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "weight", i), {n_ff, n_embd_convnext}, 0);
layer.convnext_pw2_b = create_tensor(tn(LLM_TENSOR_CONV_NEXT_PW2, "bias", i), {n_embd_convnext}, 0);
layer.convnext_gamma = create_tensor(tn(LLM_TENSOR_CONV_NEXT_GAMMA, "weight", i), {768}, 0);
layer.convnext_gamma = create_tensor(tn(LLM_TENSOR_CONV_NEXT_GAMMA, "weight", i), {n_embd_convnext}, 0);
}
// output
model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {768}, 0);
model.output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {768}, 0);
model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd_convnext}, 0);
model.output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd_convnext}, 0);
model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {768, n_embd}, 0);
model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd_convnext, n_embd}, 0);
model.output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_embd}, 0);
} break;
default:
@ -17317,7 +17338,7 @@ struct llm_build_context {
struct ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);
kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(768)), 0.0f);
kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(model.hparams.n_embd_posnet)), 0.0f);
cur = ggml_mul_mat(ctx0, kq, v);