From 82e70dbfe0f2a6d72a4b10b96708e1e0f447358b Mon Sep 17 00:00:00 2001
From: Thiago Padilha <thiago@padilha.cc>
Date: Sat, 18 Mar 2023 11:52:55 -0300
Subject: [PATCH] Move struct definitions in llama.cpp to llama.h

Signed-off-by: Thiago Padilha <thiago@padilha.cc>
---
 llama.cpp | 51 +----------------------------------------------
 llama.h   | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 50 deletions(-)
 create mode 100644 llama.h
diff --git a/llama.cpp b/llama.cpp
index c88405b82..2450e1c3d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1,3 +1,4 @@
+#include "llama.h"
 #include "ggml.h"
 
 #include "utils.h"
@@ -35,56 +36,6 @@ static const std::map<int, int> LLAMA_N_PARTS = {
     { 8192, 8 },
 };
 
-// default hparams (LLaMA 7B)
-struct llama_hparams {
-    int32_t n_vocab = 32000;
-    int32_t n_ctx   = 512;   // this is provided as user input?
-    int32_t n_embd  = 4096;
-    int32_t n_mult  = 256;
-    int32_t n_head  = 32;
-    int32_t n_layer = 32;
-    int32_t n_rot   = 64;
-    int32_t f16     = 1;
-};
-
-struct llama_layer {
-    // normalization
-    struct ggml_tensor * attention_norm;
-
-    // attention
-    struct ggml_tensor * wq;
-    struct ggml_tensor * wk;
-    struct ggml_tensor * wv;
-    struct ggml_tensor * wo;
-
-    // normalization
-    struct ggml_tensor * ffn_norm;
-
-    // ff
-    struct ggml_tensor * w1;
-    struct ggml_tensor * w2;
-    struct ggml_tensor * w3;
-};
-
-struct llama_model {
-    llama_hparams hparams;
-
-    struct ggml_tensor * tok_embeddings;
-
-    struct ggml_tensor * norm;
-    struct ggml_tensor * output;
-
-    std::vector<llama_layer> layers;
-
-    // key + value memory
-    struct ggml_tensor * memory_k;
-    struct ggml_tensor * memory_v;
-
-    //
-    struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
-};
-
 // load the model's weights from a file
 bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
     fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
diff --git a/llama.h b/llama.h
new file mode 100644
index 000000000..84f4db408
--- /dev/null
+++ b/llama.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <vector>
+#include <map>
+#include <cstdio>
+#include <string>
+
+#include "ggml.h"
+
+
+// default hparams (LLaMA 7B)
+struct llama_hparams {
+    int32_t n_vocab = 32000;
+    int32_t n_ctx   = 512;   // this is provided as user input?
+    int32_t n_embd  = 4096;
+    int32_t n_mult  = 256;
+    int32_t n_head  = 32;
+    int32_t n_layer = 32;
+    int32_t n_rot   = 64;
+    int32_t f16     = 1;
+};
+
+struct llama_layer {
+    // normalization
+    struct ggml_tensor * attention_norm;
+
+    // attention
+    struct ggml_tensor * wq;
+    struct ggml_tensor * wk;
+    struct ggml_tensor * wv;
+    struct ggml_tensor * wo;
+
+    // normalization
+    struct ggml_tensor * ffn_norm;
+
+    // ff
+    struct ggml_tensor * w1;
+    struct ggml_tensor * w2;
+    struct ggml_tensor * w3;
+};
+
+struct llama_model {
+    llama_hparams hparams;
+
+    struct ggml_tensor * tok_embeddings;
+
+    struct ggml_tensor * norm;
+    struct ggml_tensor * output;
+
+    std::vector<llama_layer> layers;
+
+    // key + value memory
+    struct ggml_tensor * memory_k;
+    struct ggml_tensor * memory_v;
+
+    //
+    struct ggml_context * ctx;
+    std::map<std::string, struct ggml_tensor *> tensors;
+};