This commit is contained in:
okada 2023-12-24 18:57:53 +09:00
parent 602cc71b06
commit 307481f28d

View file

@ -3678,13 +3678,6 @@ static bool llm_load_tensors(
model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, backend_norm); model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, backend_norm);
model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, backend_output); model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, backend_output);
if (backend_norm == GGML_BACKEND_GPU) {
vram_weights += ggml_nbytes(model.output_norm);
}
if (backend_output == GGML_BACKEND_GPU_SPLIT) {
vram_weights += ggml_nbytes(model.output);
}
} }
const uint32_t n_ff = hparams.n_ff; const uint32_t n_ff = hparams.n_ff;
@ -3708,14 +3701,7 @@ static bool llm_load_tensors(
layer.ffn_gate = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, backend_split); layer.ffn_gate = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, backend_split);
layer.ffn_down = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split); layer.ffn_down = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split);
layer.ffn_up = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split); layer.ffn_up = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split);
if (backend == GGML_BACKEND_GPU) {
vram_weights +=
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) +
ggml_nbytes(layer.ffn_gate) + ggml_nbytes(layer.ffn_up) + ggml_nbytes(layer.ffn_down);
}
} }
} break; } break;
default: default:
@ -5706,9 +5692,9 @@ struct llm_build_context {
llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il); llm_build_kv_store(ctx0, hparams, kv_self, gf, Kcur, Vcur, n_ctx, n_tokens, kv_head, cb, il);
cur = llm_build_kqv(ctx0, hparams, kv_self, cur = llm_build_kqv(ctx0, model, hparams, kv_self,
model.layers[il].wo, NULL, model.layers[il].wo, NULL,
Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, 0.0f, cb, il); Qcur, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il);
cb(cur, "kqv_out", il); cb(cur, "kqv_out", il);
} }
struct ggml_tensor * sa_out = cur; struct ggml_tensor * sa_out = cur;