feat: revert changes to default behavior of llama_token_to_piece; provide overridden declaration to receive "bool special" param to toggle showing control tokens

2024-04-23 22:50:22 -05:00 · 2024-04-23 22:50:22 -05:00 · 206c974eb6
commit 206c974eb6
parent 572960a045
2 changed files with 25 additions and 3 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -2328,10 +2328,25 @@ std::vector<llama_token> llama_tokenize(

 std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
+        GGML_ASSERT(check == -n_tokens);
+    } else {
+        result.resize(n_tokens);
+    }
+
+    return std::string(result.data(), result.size());
+}
+
+// duplicate with ability to specify whether to use special token
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
+    std::vector<char> result(8, 0);
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
+    if (n_tokens < 0) {
+        result.resize(-n_tokens);
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
        GGML_ASSERT(check == -n_tokens);
    } else {
        result.resize(n_tokens);
--- a/common/common.h
+++ b/common/common.h
@ -241,7 +241,14 @@ std::vector<llama_token> llama_tokenize(
 // should work similar to Python's `tokenizer.id_to_piece`
 std::string llama_token_to_piece(
        const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token
+);
+
+std::string llama_token_to_piece(
+        const struct llama_context * ctx,
+                       llama_token   token,
+                       bool          special
+);

 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
 //       that takes into account the tokenizer type and decides how to handle the leading space