llama : minor / style

This commit is contained in:
Georgi Gerganov 2024-04-24 09:39:22 +03:00
parent 1bf93ced81
commit 32661ac8b4
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -9028,27 +9028,25 @@ struct llm_build_context {
return gf;
}
struct ggml_cgraph* build_phi3() {
struct ggml_cgraph* gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
struct ggml_cgraph * build_phi3() {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
struct ggml_tensor* cur;
struct ggml_tensor* inpL;
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
// inp_pos - contains the positions
struct ggml_tensor* inp_pos = build_inp_pos();
struct ggml_tensor * inp_pos = build_inp_pos();
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor* KQ_mask = build_inp_KQ_mask();
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
for (int il = 0; il < n_layer; ++il) {
auto residual = inpL;
// self-attention
@ -9059,9 +9057,9 @@ struct llm_build_context {
LLM_NORM_RMS, cb, il);
cb(attn_norm_output, "attn_norm", il);
struct ggml_tensor* Qcur = nullptr;
struct ggml_tensor* Kcur = nullptr;
struct ggml_tensor* Vcur = nullptr;
struct ggml_tensor * Qcur = nullptr;
struct ggml_tensor * Kcur = nullptr;
struct ggml_tensor * Vcur = nullptr;
if (model.layers[il].wqkv) {
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, attn_norm_output);
@ -9120,6 +9118,8 @@ struct llm_build_context {
cb(cur, "ffn_norm", il);
// FF
// special-case: the up and gate tensors are merged into a single tensor
// TOOD: support into llm_build_ffn
{
struct ggml_tensor* up = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur);
cb(up, "ffn_up", il);
@ -9152,9 +9152,6 @@ struct llm_build_context {
cur = ggml_mul_mat(ctx0, model.output, cur);
cb(cur, "result_output", -1);
//cur = ggml_add(ctx0, cur, NULL);
//cb(cur, "result_output", -1);
ggml_build_forward_expand(gf, cur);
return gf;