Merge branch 'master' into r1-toolcall

This commit is contained in:
Olivier Chafik 2025-02-05 01:10:45 +00:00 committed by GitHub
commit 1f1f06aa26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 126 additions and 54 deletions

View file

@ -4610,7 +4610,8 @@ struct llm_build_context {
ggml_row_size(kv_pe_compresseed->type, kv_lora_rank));
cb(k_pe, "k_pe", il);
kv_compressed = ggml_cont(ctx0, kv_compressed); // TODO: the CUDA backend does not support non-contiguous norm
// TODO: the CUDA backend used to not support non-cont. (RMS) norm, investigate removing ggml_cont
kv_compressed = ggml_cont(ctx0, kv_compressed);
kv_compressed = llm_build_norm(ctx0, kv_compressed, hparams,
model.layers[il].attn_kv_a_norm, NULL,
LLM_NORM_RMS, cb, il);
@ -6464,7 +6465,8 @@ struct llm_build_context {
ggml_row_size(kv_pe_compresseed->type, kv_lora_rank));
cb(k_pe, "k_pe", il);
kv_compressed = ggml_cont(ctx0, kv_compressed); // TODO: the CUDA backend does not support non-contiguous norm
// TODO: the CUDA backend used to not support non-cont. (RMS) norm, investigate removing ggml_cont
kv_compressed = ggml_cont(ctx0, kv_compressed);
kv_compressed = llm_build_norm(ctx0, kv_compressed, hparams,
model.layers[il].attn_kv_a_norm, NULL,
LLM_NORM_RMS, cb, il);