Fix incorrect offloading and norm_rms_eps value
This commit is contained in:
parent
aa7fbef78c
commit
06c278895f
1 changed files with 6 additions and 7 deletions
13
llama.cpp
13
llama.cpp
|
@ -3028,7 +3028,7 @@ struct llm_build_ctx {
|
||||||
, n_embd_gqa (hparams.n_embd_gqa())
|
, n_embd_gqa (hparams.n_embd_gqa())
|
||||||
, freq_base (cparams.rope_freq_base)
|
, freq_base (cparams.rope_freq_base)
|
||||||
, freq_scale (cparams.rope_freq_scale)
|
, freq_scale (cparams.rope_freq_scale)
|
||||||
, norm_rms_eps (hparams.f_norm_eps)
|
, norm_rms_eps (hparams.f_norm_rms_eps)
|
||||||
|
|
||||||
, n_gpu_layers (model.n_gpu_layers)
|
, n_gpu_layers (model.n_gpu_layers)
|
||||||
|
|
||||||
|
@ -3413,7 +3413,7 @@ static struct ggml_cgraph * llm_build_llama(
|
||||||
for (int il = 0; il < bctx.n_layer; ++il) {
|
for (int il = 0; il < bctx.n_layer; ++il) {
|
||||||
ggml_format_name(inpL, "layer_inp_%d", il);
|
ggml_format_name(inpL, "layer_inp_%d", il);
|
||||||
|
|
||||||
offload_func_t offload_func = llama_nop;
|
bctx.offload_func = llama_nop;
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
if (il >= i_gpu_start) {
|
if (il >= i_gpu_start) {
|
||||||
|
@ -3425,25 +3425,24 @@ static struct ggml_cgraph * llm_build_llama(
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
cur = ggml_rms_norm(bctx.ctx0, inpL, bctx.norm_rms_eps);
|
cur = ggml_rms_norm(bctx.ctx0, inpL, bctx.norm_rms_eps);
|
||||||
offload_func(cur);
|
bctx.offload_func(cur);
|
||||||
ggml_set_name(cur, "rms_norm_0");
|
ggml_set_name(cur, "rms_norm_0");
|
||||||
|
|
||||||
bctx.offload_func = offload_func;
|
|
||||||
cur = bctx.build_attn_block(il, cur);
|
cur = bctx.build_attn_block(il, cur);
|
||||||
|
|
||||||
struct ggml_tensor * inpFF = ggml_add(bctx.ctx0, cur, inpSA);
|
struct ggml_tensor * inpFF = ggml_add(bctx.ctx0, cur, inpSA);
|
||||||
offload_func(inpFF);
|
bctx.offload_func(inpFF);
|
||||||
ggml_set_name(inpFF, "inpFF");
|
ggml_set_name(inpFF, "inpFF");
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
cur = ggml_rms_norm(bctx.ctx0, inpFF, bctx.norm_rms_eps);
|
cur = ggml_rms_norm(bctx.ctx0, inpFF, bctx.norm_rms_eps);
|
||||||
offload_func(cur);
|
bctx.offload_func(cur);
|
||||||
ggml_set_name(cur, "rms_norm_1");
|
ggml_set_name(cur, "rms_norm_1");
|
||||||
|
|
||||||
cur = bctx.build_ffn_block(il, cur);
|
cur = bctx.build_ffn_block(il, cur);
|
||||||
|
|
||||||
cur = ggml_add(bctx.ctx0, cur, inpFF);
|
cur = ggml_add(bctx.ctx0, cur, inpFF);
|
||||||
offload_func(cur);
|
bctx.offload_func(cur);
|
||||||
ggml_set_name(cur, "inpFF_+_result_w2");
|
ggml_set_name(cur, "inpFF_+_result_w2");
|
||||||
|
|
||||||
// input for next layer
|
// input for next layer
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue