llama : minor
This commit is contained in:
parent
abdc8ea34a
commit
9a9e6cde66
1 changed files with 2 additions and 11 deletions
13
llama.cpp
13
llama.cpp
|
@ -4307,7 +4307,7 @@ static bool llm_load_tensors(
|
||||||
// output
|
// output
|
||||||
{
|
{
|
||||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
|
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
|
||||||
// if output is NULL, init from the input tok embed
|
// if output is NULL, init from the input tok embed
|
||||||
if (model.output == NULL) {
|
if (model.output == NULL) {
|
||||||
model.output = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
model.output = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||||
|
@ -4333,8 +4333,7 @@ static bool llm_load_tensors(
|
||||||
|
|
||||||
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
|
||||||
|
|
||||||
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd}, false);
|
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd});
|
||||||
|
|
||||||
|
|
||||||
GGML_ASSERT(hparams.n_expert > 0);
|
GGML_ASSERT(hparams.n_expert > 0);
|
||||||
GGML_ASSERT(hparams.n_expert_used > 0);
|
GGML_ASSERT(hparams.n_expert_used > 0);
|
||||||
|
@ -6335,7 +6334,6 @@ struct llm_build_context {
|
||||||
|
|
||||||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||||
|
|
||||||
|
|
||||||
// multiply by embedding_multiplier_scale of 78.38367176906169
|
// multiply by embedding_multiplier_scale of 78.38367176906169
|
||||||
inpL = ggml_scale(ctx0, inpL, 78.38367176906169f);
|
inpL = ggml_scale(ctx0, inpL, 78.38367176906169f);
|
||||||
|
|
||||||
|
@ -6346,7 +6344,6 @@ struct llm_build_context {
|
||||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
//for (int il = 0; il < 1; ++il) {
|
|
||||||
struct ggml_tensor * inpSA = inpL;
|
struct ggml_tensor * inpSA = inpL;
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
|
@ -6452,9 +6449,6 @@ struct llm_build_context {
|
||||||
ggml_tensor * cur_gate = ggml_mul_mat_id(ctx0, model.layers[il].ffn_gate_exp, n_expert, selected_experts, i, cur);
|
ggml_tensor * cur_gate = ggml_mul_mat_id(ctx0, model.layers[il].ffn_gate_exp, n_expert, selected_experts, i, cur);
|
||||||
cb(cur_gate, "ffn_moe_gate", il);
|
cb(cur_gate, "ffn_moe_gate", il);
|
||||||
|
|
||||||
//cur_gate = ggml_silu(ctx0, cur_gate);
|
|
||||||
//cb(cur_gate, "ffn_moe_silu", il);
|
|
||||||
|
|
||||||
//GeLU
|
//GeLU
|
||||||
cur_gate = ggml_gelu(ctx0, cur_gate);
|
cur_gate = ggml_gelu(ctx0, cur_gate);
|
||||||
cb(cur_gate, "ffn_moe_gelu", il);
|
cb(cur_gate, "ffn_moe_gelu", il);
|
||||||
|
@ -6479,7 +6473,6 @@ struct llm_build_context {
|
||||||
|
|
||||||
cur = moe_out;
|
cur = moe_out;
|
||||||
|
|
||||||
|
|
||||||
// Grok
|
// Grok
|
||||||
// if layer_out_norm is present then apply it before adding the input
|
// if layer_out_norm is present then apply it before adding the input
|
||||||
// Idea: maybe ffn_out_norm is a better name
|
// Idea: maybe ffn_out_norm is a better name
|
||||||
|
@ -6514,7 +6507,6 @@ struct llm_build_context {
|
||||||
// lm_head
|
// lm_head
|
||||||
cur = ggml_mul_mat(ctx0, model.output, cur);
|
cur = ggml_mul_mat(ctx0, model.output, cur);
|
||||||
|
|
||||||
|
|
||||||
// Grok
|
// Grok
|
||||||
// multiply logits by output_multiplier_scale of 0.5773502691896257
|
// multiply logits by output_multiplier_scale of 0.5773502691896257
|
||||||
|
|
||||||
|
@ -6527,7 +6519,6 @@ struct llm_build_context {
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct ggml_cgraph * build_starcoder() {
|
struct ggml_cgraph * build_starcoder() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue