Remove llama.cpp.h

2023-07-25 17:49:24 +02:00 · 2023-07-25 17:49:24 +02:00 · e68580f993
commit e68580f993
parent 8253a534eb
7 changed files with 57 additions and 52 deletions
--- a/examples/common.h
+++ b/examples/common.h
@ -2,8 +2,8 @@

 #pragma once

+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"

 #include <string>
 #include <vector>
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@ -1,6 +1,7 @@
 #include "ggml.h"
 #include "build-info.h"

+#define LLAMA_API_CPP // TODO: eliminate me
 #define LLAMA_API_INTERNAL
 #include "llama.h"

--- a/llama.cpp
+++ b/llama.cpp
@ -3694,7 +3694,6 @@ int llama_tokenize_bpe(
    return res.size();
 }

-
 int llama_n_vocab_from_model(const struct llama_model * model) {
    return model->vocab.id_to_token.size();
 }
@ -3781,6 +3780,24 @@ int llama_token_to_str(const struct llama_context * ctx, llama_token token, char
    return llama_token_to_str_with_model(&ctx->model, token, str, length);
 }

+std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
 int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token, char * str, int length) {
    if (0 <= token && token < llama_n_vocab_from_model(&ctx->model)) {
        std::string result = ctx->model.vocab.id_to_token[token].tok;
@ -3793,6 +3810,24 @@ int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token,
    return 0;
 }

+std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token) {
+    std::string result;
+    int length = 8;
+    result.resize(length);
+    length = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+    if (length < 0) {
+        result.resize(-length);
+        int check = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
+        assert(check == -length);
+        GGML_UNUSED(check);
+    } else {
+        result.resize(length);
+    }
+    return result;
+}
+
 llama_token llama_token_bos() {
    return 1;
 }
--- a/llama.cpp.h
+++ b/llama.cpp.h
@ -1,45 +0,0 @@
-#ifndef LLAMA_CPP_H
-#define LLAMA_CPP_H
-
-#include "llama.h"
-
-#include <cassert>
-#include <string>
-
-static std::string llama_token_to_str(
-        const struct llama_context * ctx,
-                       llama_token   token) {
-    std::string result;
-    int length = 8;
-    result.resize(length);
-    length = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_token_to_str(ctx, token, (char *)result.data(), result.length());
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
-}
-
-static std::string llama_token_to_str_bpe(
-    const struct llama_context * ctx,
-                   llama_token   token) {
-    std::string result;
-    int length = 8;
-    result.resize(length);
-    length = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
-    if (length < 0) {
-        result.resize(-length);
-        int check = llama_token_to_str_bpe(ctx, token, (char*)result.data(), result.length());
-        assert(check == -length);
-        GGML_UNUSED(check);
-    } else {
-        result.resize(length);
-    }
-    return result;
-}
-
-#endif
--- a/llama.h
+++ b/llama.h
@ -415,15 +415,29 @@ extern "C" {
 }
 #endif

-// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
-#ifdef LLAMA_API_INTERNAL
+// C++ API, will be moving to common.h soon (TM)
+#ifdef LLAMA_API_CPP

 #include <vector>
 #include <string>
+
+static std::string llama_token_to_str(
+        const struct llama_context * ctx,
+                       llama_token   token);
+
+static std::string llama_token_to_str_bpe(
+    const struct llama_context * ctx,
+                   llama_token   token);
+
+// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
+#ifdef LLAMA_API_INTERNAL
+
 struct ggml_tensor;

 const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);

-#endif
+#endif // LLAMA_API_CPP
+
+#endif // LLAMA_API_INTERNAL

 #endif // LLAMA_H
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@ -1,5 +1,5 @@
+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"

 #include <cstdio>
 #include <string>
--- a/tests/test-tokenizer-1.cpp
+++ b/tests/test-tokenizer-1.cpp
@ -1,5 +1,5 @@
+#define LLAMA_API_CPP // TODO: eliminate me
 #include "llama.h"
-#include "llama.cpp.h"

 #include <cassert>
 #include <cstdio>