Move struct definitions in llama.cpp to llama.h
Signed-off-by: Thiago Padilha <thiago@padilha.cc>
This commit is contained in:
parent
51d003e885
commit
82e70dbfe0
2 changed files with 60 additions and 50 deletions
51
llama.cpp
51
llama.cpp
|
@ -1,3 +1,4 @@
|
|||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
@ -35,56 +36,6 @@ static const std::map<int, int> LLAMA_N_PARTS = {
|
|||
{ 8192, 8 },
|
||||
};
|
||||
|
||||
// default hparams (LLaMA 7B)
|
||||
struct llama_hparams {
|
||||
int32_t n_vocab = 32000;
|
||||
int32_t n_ctx = 512; // this is provided as user input?
|
||||
int32_t n_embd = 4096;
|
||||
int32_t n_mult = 256;
|
||||
int32_t n_head = 32;
|
||||
int32_t n_layer = 32;
|
||||
int32_t n_rot = 64;
|
||||
int32_t f16 = 1;
|
||||
};
|
||||
|
||||
struct llama_layer {
|
||||
// normalization
|
||||
struct ggml_tensor * attention_norm;
|
||||
|
||||
// attention
|
||||
struct ggml_tensor * wq;
|
||||
struct ggml_tensor * wk;
|
||||
struct ggml_tensor * wv;
|
||||
struct ggml_tensor * wo;
|
||||
|
||||
// normalization
|
||||
struct ggml_tensor * ffn_norm;
|
||||
|
||||
// ff
|
||||
struct ggml_tensor * w1;
|
||||
struct ggml_tensor * w2;
|
||||
struct ggml_tensor * w3;
|
||||
};
|
||||
|
||||
struct llama_model {
|
||||
llama_hparams hparams;
|
||||
|
||||
struct ggml_tensor * tok_embeddings;
|
||||
|
||||
struct ggml_tensor * norm;
|
||||
struct ggml_tensor * output;
|
||||
|
||||
std::vector<llama_layer> layers;
|
||||
|
||||
// key + value memory
|
||||
struct ggml_tensor * memory_k;
|
||||
struct ggml_tensor * memory_v;
|
||||
|
||||
//
|
||||
struct ggml_context * ctx;
|
||||
std::map<std::string, struct ggml_tensor *> tensors;
|
||||
};
|
||||
|
||||
// load the model's weights from a file
|
||||
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
|
||||
fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
||||
|
|
59
llama.h
Normal file
59
llama.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
|
||||
// default hparams (LLaMA 7B)
|
||||
struct llama_hparams {
|
||||
int32_t n_vocab = 32000;
|
||||
int32_t n_ctx = 512; // this is provided as user input?
|
||||
int32_t n_embd = 4096;
|
||||
int32_t n_mult = 256;
|
||||
int32_t n_head = 32;
|
||||
int32_t n_layer = 32;
|
||||
int32_t n_rot = 64;
|
||||
int32_t f16 = 1;
|
||||
};
|
||||
|
||||
struct llama_layer {
|
||||
// normalization
|
||||
struct ggml_tensor * attention_norm;
|
||||
|
||||
// attention
|
||||
struct ggml_tensor * wq;
|
||||
struct ggml_tensor * wk;
|
||||
struct ggml_tensor * wv;
|
||||
struct ggml_tensor * wo;
|
||||
|
||||
// normalization
|
||||
struct ggml_tensor * ffn_norm;
|
||||
|
||||
// ff
|
||||
struct ggml_tensor * w1;
|
||||
struct ggml_tensor * w2;
|
||||
struct ggml_tensor * w3;
|
||||
};
|
||||
|
||||
struct llama_model {
|
||||
llama_hparams hparams;
|
||||
|
||||
struct ggml_tensor * tok_embeddings;
|
||||
|
||||
struct ggml_tensor * norm;
|
||||
struct ggml_tensor * output;
|
||||
|
||||
std::vector<llama_layer> layers;
|
||||
|
||||
// key + value memory
|
||||
struct ggml_tensor * memory_k;
|
||||
struct ggml_tensor * memory_v;
|
||||
|
||||
//
|
||||
struct ggml_context * ctx;
|
||||
std::map<std::string, struct ggml_tensor *> tensors;
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue