llama : support running Mamba-Codestral-7B-v0.1
This commit is contained in:
parent
dceff23fae
commit
2bfe9de6d3
2 changed files with 5 additions and 1 deletions
|
@ -2843,6 +2843,10 @@ class Mamba2Model(Model):
|
|||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
del bid # unused
|
||||
|
||||
if name.startswith("model.backbone") or name.startswith("model.lm_head"):
|
||||
# map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2
|
||||
name = name.removeprefix("model.")
|
||||
|
||||
if name.endswith(".dt_bias"):
|
||||
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
|
||||
|
||||
|
|
|
@ -9383,7 +9383,7 @@ static struct ggml_tensor * llm_build_mamba2(
|
|||
// grouped RMS norm
|
||||
y = ggml_reshape_4d(ctx, y, d_inner / n_group, n_group, n_seq_tokens, n_seqs);
|
||||
y = llm_build_norm(ctx, y, hparams,
|
||||
model.layers[il].ssm_norm, NULL,
|
||||
ggml_reshape_2d(ctx, model.layers[il].ssm_norm, d_inner / n_group, n_group), NULL,
|
||||
LLM_NORM_RMS, cb, il);
|
||||
y = ggml_reshape_3d(ctx, y, d_inner, n_seq_tokens, n_seqs);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue