From e68580f9937ad2fd4a8c3e35940ca04670de7df4 Mon Sep 17 00:00:00 2001
From: goerch <jhr.walter@t-online.de>
Date: Tue, 25 Jul 2023 17:49:24 +0200
Subject: [PATCH] Remove llama.cpp.h

---
 examples/common.h                          |  2 +-
 examples/quantize-stats/quantize-stats.cpp |  1 +
 llama.cpp                                  | 37 +++++++++++++++++-
 llama.cpp.h                                | 45 ----------------------
 llama.h                                    | 20 ++++++++--
 tests/test-tokenizer-0.cpp                 |  2 +-
 tests/test-tokenizer-1.cpp                 |  2 +-
 7 files changed, 57 insertions(+), 52 deletions(-)
 delete mode 100644 llama.cpp.h
diff --git a/examples/common.h b/examples/common.h
index a9dd6933c..2fd5d38a8 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -2,8 +2,8 @@
 
 #pragma once
 
+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"
 
 #include <string>
 #include <vector>
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 6aa06ec8f..a330b20df 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -1,6 +1,7 @@
 #include "ggml.h"
 #include "build-info.h"
 
+#define LLAMA_API_CPP // TODO: eliminate me
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 
diff --git a/llama.cpp b/llama.cpp
index a1a85e396..f49bca4cf 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3694,7 +3694,6 @@ int llama_tokenize_bpe(
     return res.size();
 }
 
-
 int llama_n_vocab_from_model(const struct llama_model * model) {
     return model->vocab.id_to_token.size();
 }
@@ -3781,6 +3780,24 @@ int llama_token_to_str(const struct llama_context * ctx, llama_token token, char
     return llama_token_to_str_with_model(&ctx->model, token, str, length);
 }
 
+std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
 int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token, char * str, int length) {
     if (0 <= token && token < llama_n_vocab_from_model(&ctx->model)) {
         std::string result = ctx->model.vocab.id_to_token[token].tok;
@@ -3793,6 +3810,24 @@ int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token,
     return 0;
 }
 
+std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
 llama_token llama_token_bos() {
     return 1;
 }
diff --git a/llama.cpp.h b/llama.cpp.h
deleted file mode 100644
index 0354c5892..000000000
--- a/llama.cpp.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef LLAMA_CPP_H
-#define LLAMA_CPP_H
-
-#include "llama.h"
-
-#include <cassert>
-#include <string>
-
-static std::string llama_token_to_str(
-        const struct llama_context * ctx,
-                       llama_token   token) {
-    std::string result;
-    int length = 8;
-    result.resize(length);
-    length = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
-}
-
-static std::string llama_token_to_str_bpe(
-    const struct llama_context * ctx,
-                   llama_token   token) {
-    std::string result;
-    int length = 8;
-    result.resize(length);
-    length = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
-}
-
-#endif
diff --git a/llama.h b/llama.h
index 54a5e246c..c5852a3b4 100644
--- a/llama.h
+++ b/llama.h
@@ -415,15 +415,29 @@ extern "C" {
 }
 #endif
 
-// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
-#ifdef LLAMA_API_INTERNAL
+// C++ API, will be moving to common.h soon (TM)
+#ifdef LLAMA_API_CPP
 
 #include <vector>
 #include <string>
+
+static std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token);
+
+static std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token);
+
+// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
+#ifdef LLAMA_API_INTERNAL
+
 struct ggml_tensor;
 
 const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
 
-#endif
+#endif // LLAMA_API_CPP
+
+#endif // LLAMA_API_INTERNAL
 
 #endif // LLAMA_H
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index 9d7b5b348..f8642996a 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -1,5 +1,5 @@
+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"
 
 #include <cstdio>
 #include <string>
diff --git a/tests/test-tokenizer-1.cpp b/tests/test-tokenizer-1.cpp
index b7db2c877..cde7a203b 100644
--- a/tests/test-tokenizer-1.cpp
+++ b/tests/test-tokenizer-1.cpp
@@ -1,5 +1,5 @@
+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"
 
 #include <cassert>
 #include <cstdio>