Move struct definitions in llama.cpp to llama.h

Signed-off-by: Thiago Padilha <thiago@padilha.cc>
2023-03-18 11:52:55 -03:00 · 2023-03-18 11:52:55 -03:00 · 6864a1f8e2
commit 6864a1f8e2
parent 95c6748913
2 changed files with 60 additions and 50 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1,3 +1,4 @@
 #include "llama.h"
 #include "ggml.h"
 #include "utils.h"
@ -39,56 +40,6 @@ static const std::map<int, int> LLAMA_N_PARTS = {
    { 8192, 8 },
 };
 // default hparams (LLaMA 7B)
 struct llama_hparams {
    int32_t n_vocab = 32000;
    int32_t n_ctx   = 512;   // this is provided as user input?
    int32_t n_embd  = 4096;
    int32_t n_mult  = 256;
    int32_t n_head  = 32;
    int32_t n_layer = 32;
    int32_t n_rot   = 64;
    int32_t f16     = 1;
 };
 struct llama_layer {
    // normalization
    struct ggml_tensor * attention_norm;
    // attention
    struct ggml_tensor * wq;
    struct ggml_tensor * wk;
    struct ggml_tensor * wv;
    struct ggml_tensor * wo;
    // normalization
    struct ggml_tensor * ffn_norm;
    // ff
    struct ggml_tensor * w1;
    struct ggml_tensor * w2;
    struct ggml_tensor * w3;
 };
 struct llama_model {
    llama_hparams hparams;
    struct ggml_tensor * tok_embeddings;
    struct ggml_tensor * norm;
    struct ggml_tensor * output;
    std::vector<llama_layer> layers;
    // key + value memory
    struct ggml_tensor * memory_k;
    struct ggml_tensor * memory_v;
    //
    struct ggml_context * ctx;
    std::map<std::string, struct ggml_tensor *> tensors;
 };
 // load the model's weights from a file
 bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32) {
    fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
--- a/llama.h
+++ b/llama.h
@ -0,0 +1,59 @@
 #pragma once
 #include <vector>
 #include <map>
 #include <cstdio>
 #include <string>
 #include "ggml.h"
 // default hparams (LLaMA 7B)
 struct llama_hparams {
    int32_t n_vocab = 32000;
    int32_t n_ctx   = 512;   // this is provided as user input?
    int32_t n_embd  = 4096;
    int32_t n_mult  = 256;
    int32_t n_head  = 32;
    int32_t n_layer = 32;
    int32_t n_rot   = 64;
    int32_t f16     = 1;
 };
 struct llama_layer {
    // normalization
    struct ggml_tensor * attention_norm;
    // attention
    struct ggml_tensor * wq;
    struct ggml_tensor * wk;
    struct ggml_tensor * wv;
    struct ggml_tensor * wo;
    // normalization
    struct ggml_tensor * ffn_norm;
    // ff
    struct ggml_tensor * w1;
    struct ggml_tensor * w2;
    struct ggml_tensor * w3;
 };
 struct llama_model {
    llama_hparams hparams;
    struct ggml_tensor * tok_embeddings;
    struct ggml_tensor * norm;
    struct ggml_tensor * output;
    std::vector<llama_layer> layers;
    // key + value memory
    struct ggml_tensor * memory_k;
    struct ggml_tensor * memory_v;
    //
    struct ggml_context * ctx;
    std::map<std::string, struct ggml_tensor *> tensors;
 };