From 82e70dbfe0f2a6d72a4b10b96708e1e0f447358b Mon Sep 17 00:00:00 2001 From: Thiago Padilha Date: Sat, 18 Mar 2023 11:52:55 -0300 Subject: [PATCH] Move struct definitions in llama.cpp to llama.h Signed-off-by: Thiago Padilha --- llama.cpp | 51 +---------------------------------------------- llama.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 50 deletions(-) create mode 100644 llama.h diff --git a/llama.cpp b/llama.cpp index c88405b82..2450e1c3d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1,3 +1,4 @@ +#include "llama.h" #include "ggml.h" #include "utils.h" @@ -35,56 +36,6 @@ static const std::map LLAMA_N_PARTS = { { 8192, 8 }, }; -// default hparams (LLaMA 7B) -struct llama_hparams { - int32_t n_vocab = 32000; - int32_t n_ctx = 512; // this is provided as user input? - int32_t n_embd = 4096; - int32_t n_mult = 256; - int32_t n_head = 32; - int32_t n_layer = 32; - int32_t n_rot = 64; - int32_t f16 = 1; -}; - -struct llama_layer { - // normalization - struct ggml_tensor * attention_norm; - - // attention - struct ggml_tensor * wq; - struct ggml_tensor * wk; - struct ggml_tensor * wv; - struct ggml_tensor * wo; - - // normalization - struct ggml_tensor * ffn_norm; - - // ff - struct ggml_tensor * w1; - struct ggml_tensor * w2; - struct ggml_tensor * w3; -}; - -struct llama_model { - llama_hparams hparams; - - struct ggml_tensor * tok_embeddings; - - struct ggml_tensor * norm; - struct ggml_tensor * output; - - std::vector layers; - - // key + value memory - struct ggml_tensor * memory_k; - struct ggml_tensor * memory_v; - - // - struct ggml_context * ctx; - std::map tensors; -}; - // load the model's weights from a file bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); diff --git a/llama.h b/llama.h new file mode 100644 index 000000000..84f4db408 --- /dev/null +++ b/llama.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include + +#include "ggml.h" + + +// default hparams (LLaMA 7B) +struct llama_hparams { + int32_t n_vocab = 32000; + int32_t n_ctx = 512; // this is provided as user input? + int32_t n_embd = 4096; + int32_t n_mult = 256; + int32_t n_head = 32; + int32_t n_layer = 32; + int32_t n_rot = 64; + int32_t f16 = 1; +}; + +struct llama_layer { + // normalization + struct ggml_tensor * attention_norm; + + // attention + struct ggml_tensor * wq; + struct ggml_tensor * wk; + struct ggml_tensor * wv; + struct ggml_tensor * wo; + + // normalization + struct ggml_tensor * ffn_norm; + + // ff + struct ggml_tensor * w1; + struct ggml_tensor * w2; + struct ggml_tensor * w3; +}; + +struct llama_model { + llama_hparams hparams; + + struct ggml_tensor * tok_embeddings; + + struct ggml_tensor * norm; + struct ggml_tensor * output; + + std::vector layers; + + // key + value memory + struct ggml_tensor * memory_k; + struct ggml_tensor * memory_v; + + // + struct ggml_context * ctx; + std::map tensors; +};