From fe28a7b9d8d0c90d70778d0a2df92094aec2faa6 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 23 Jul 2024 08:38:50 +0300
Subject: [PATCH] llama : clean-up

---
 src/llama-impl.h     | 18 ------------------
 src/llama-sampling.h |  2 --
 src/llama-vocab.h    |  1 -
 src/llama.cpp        | 26 ++++++++++++++++++--------
 src/unicode.h        |  2 ++
 5 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/src/llama-impl.h b/src/llama-impl.h
index 458049a97..dcc8c1c15 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -3,24 +3,6 @@
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 
-#ifdef __has_include
-    #if __has_include(<unistd.h>)
-        #include <unistd.h>
-        #if defined(_POSIX_MAPPED_FILES)
-            #include <sys/mman.h>
-            #include <fcntl.h>
-        #endif
-        #if defined(_POSIX_MEMLOCK_RANGE)
-            #include <sys/resource.h>
-        #endif
-    #endif
-#endif
-
-// bump if necessary
-#define LLAMA_MAX_NODES   8192
-#define LLAMA_MAX_LAYERS  512
-#define LLAMA_MAX_EXPERTS 160  // DeepSeekV2
-
 #ifdef __GNUC__
 #ifdef __MINGW32__
 #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
diff --git a/src/llama-sampling.h b/src/llama-sampling.h
index 65b7ceed7..f7f8e3ef7 100644
--- a/src/llama-sampling.h
+++ b/src/llama-sampling.h
@@ -18,8 +18,6 @@ struct llama_sampling {
     }
 };
 
-struct llama_sampling * llama_get_sampling(struct llama_context * ctx);
-
 //
 // internal API
 //
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 44c0efced..30b565d55 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -62,7 +62,6 @@ struct llama_vocab {
 };
 
 const struct llama_vocab * llama_get_vocab(const struct llama_context * ctx);
-const struct llama_vocab * llama_get_vocab(const struct llama_model   * model);
 
 //
 // internal API
diff --git a/src/llama.cpp b/src/llama.cpp
index 8cb5babb2..40c5e8e8d 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -36,6 +36,19 @@
 // TODO: replace with ggml API call
 #define QK_K 256
 
+#ifdef __has_include
+    #if __has_include(<unistd.h>)
+        #include <unistd.h>
+        #if defined(_POSIX_MAPPED_FILES)
+            #include <sys/mman.h>
+            #include <fcntl.h>
+        #endif
+        #if defined(_POSIX_MEMLOCK_RANGE)
+            #include <sys/resource.h>
+        #endif
+    #endif
+#endif
+
 #if defined(_WIN32)
     #define WIN32_LEAN_AND_MEAN
     #ifndef NOMINMAX
@@ -87,6 +100,11 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 
+// bump if necessary
+#define LLAMA_MAX_NODES   8192
+#define LLAMA_MAX_LAYERS  512
+#define LLAMA_MAX_EXPERTS 160  // DeepSeekV2
+
 //
 // helpers
 //
@@ -16794,14 +16812,6 @@ const struct llama_vocab * llama_get_vocab(const struct llama_context * ctx) {
     return &ctx->model.vocab;
 }
 
-const struct llama_vocab * llama_get_vocab(const struct llama_model * model) {
-    return &model->vocab;
-}
-
-struct llama_sampling * llama_get_sampling(struct llama_context * ctx) {
-    return &ctx->sampling;
-}
-
 struct llama_grammar * llama_get_grammar(struct llama_context * ctx) {
     return &ctx->grammar;
 }
diff --git a/src/unicode.h b/src/unicode.h
index 52609ea37..008532a24 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -4,6 +4,8 @@
 #include <string>
 #include <vector>
 
+// TODO: prefix all symbols with "llama_"
+
 struct codepoint_flags {
     enum {
         UNDEFINED       = 0x0001,