q/k ln and pos_embd only if required
This commit is contained in:
parent
b69e6c0a0f
commit
d9e48194e4
3 changed files with 8 additions and 7 deletions
|
@ -117,6 +117,7 @@ Typically finetunes of the base models below are supported as well.
|
|||
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
||||
- [x] [Xverse](https://huggingface.co/models?search=xverse)
|
||||
- [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
|
||||
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
|
||||
|
||||
**Multimodal models:**
|
||||
|
||||
|
|
|
@ -289,13 +289,13 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||
"transformer.blocks.{bid}.attn.q_ln",
|
||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_K_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||
"transformer.blocks.{bid}.attn.k_ln",
|
||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ROPE_FREQS: (
|
||||
|
|
10
llama.cpp
10
llama.cpp
|
@ -4746,7 +4746,7 @@ static bool llm_load_tensors(
|
|||
case LLM_ARCH_MPT:
|
||||
{
|
||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train});
|
||||
model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train}, false);
|
||||
|
||||
// output
|
||||
{
|
||||
|
@ -4785,11 +4785,11 @@ static bool llm_load_tensors(
|
|||
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
||||
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, false);
|
||||
|
||||
layer.attn_q_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd});
|
||||
layer.attn_q_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "bias", i), {n_embd});
|
||||
layer.attn_q_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd}, false);
|
||||
layer.attn_q_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "bias", i), {n_embd}, false);
|
||||
|
||||
layer.attn_k_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd});
|
||||
layer.attn_k_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "bias", i), {n_embd});
|
||||
layer.attn_k_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd}, false);
|
||||
layer.attn_k_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "bias", i), {n_embd}, false);
|
||||
|
||||
// AWQ ScaleActivation layer
|
||||
layer.ffn_act = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_ACT, "scales", i), {n_ff}, false);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue