Move struct definitions in llama.cpp to llama.h

Signed-off-by: Thiago Padilha <thiago@padilha.cc>
2023-03-18 11:52:55 -03:00 · 2023-03-18 11:52:55 -03:00 · 82e70dbfe0
commit 82e70dbfe0
parent 51d003e885
2 changed files with 60 additions and 50 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1,3 +1,4 @@
+#include "llama.h"
 #include "ggml.h"

 #include "utils.h"
@ -35,56 +36,6 @@ static const std::map<int, int> LLAMA_N_PARTS = {
    { 8192, 8 },
 };

-// default hparams (LLaMA 7B)
-struct llama_hparams {
-    int32_t n_vocab = 32000;
-    int32_t n_ctx   = 512;   // this is provided as user input?
-    int32_t n_embd  = 4096;
-    int32_t n_mult  = 256;
-    int32_t n_head  = 32;
-    int32_t n_layer = 32;
-    int32_t n_rot   = 64;
-    int32_t f16     = 1;
-};
-
-struct llama_layer {
-    // normalization
-    struct ggml_tensor * attention_norm;
-
-    // attention
-    struct ggml_tensor * wq;
-    struct ggml_tensor * wk;
-    struct ggml_tensor * wv;
-    struct ggml_tensor * wo;
-
-    // normalization
-    struct ggml_tensor * ffn_norm;
-
-    // ff
-    struct ggml_tensor * w1;
-    struct ggml_tensor * w2;
-    struct ggml_tensor * w3;
-};
-
-struct llama_model {
-    llama_hparams hparams;
-
-    struct ggml_tensor * tok_embeddings;
-
-    struct ggml_tensor * norm;
-    struct ggml_tensor * output;
-
-    std::vector<llama_layer> layers;
-
-    // key + value memory
-    struct ggml_tensor * memory_k;
-    struct ggml_tensor * memory_v;
-
-    //
-    struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
-};
-
 // load the model's weights from a file
 bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
    fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
--- a/llama.h
+++ b/llama.h
@ -0,0 +1,59 @@
+#pragma once
+
+#include <vector>
+#include <map>
+#include <cstdio>
+#include <string>
+
+#include "ggml.h"
+
+
+// default hparams (LLaMA 7B)
+struct llama_hparams {
+    int32_t n_vocab = 32000;
+    int32_t n_ctx   = 512;   // this is provided as user input?
+    int32_t n_embd  = 4096;
+    int32_t n_mult  = 256;
+    int32_t n_head  = 32;
+    int32_t n_layer = 32;
+    int32_t n_rot   = 64;
+    int32_t f16     = 1;
+};
+
+struct llama_layer {
+    // normalization
+    struct ggml_tensor * attention_norm;
+
+    // attention
+    struct ggml_tensor * wq;
+    struct ggml_tensor * wk;
+    struct ggml_tensor * wv;
+    struct ggml_tensor * wo;
+
+    // normalization
+    struct ggml_tensor * ffn_norm;
+
+    // ff
+    struct ggml_tensor * w1;
+    struct ggml_tensor * w2;
+    struct ggml_tensor * w3;
+};
+
+struct llama_model {
+    llama_hparams hparams;
+
+    struct ggml_tensor * tok_embeddings;
+
+    struct ggml_tensor * norm;
+    struct ggml_tensor * output;
+
+    std::vector<llama_layer> layers;
+
+    // key + value memory
+    struct ggml_tensor * memory_k;
+    struct ggml_tensor * memory_v;
+
+    //
+    struct ggml_context * ctx;
+    std::map<std::string, struct ggml_tensor *> tensors;
+};