Merge 8f83ca592d into edd1ab7bc3

2024-01-01 00:52:33 -08:00 · 2024-01-01 00:52:33 -08:00 · 64b9e5f39d
commit 64b9e5f39d
parent edd1ab7bc3 8f83ca592d
4 changed files with 16 additions and 9 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1182,12 +1182,12 @@ std::vector<llama_token> llama_tokenize(
    return result;
 }

-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
        GGML_ASSERT(check == -n_tokens);
    } else {
        result.resize(n_tokens);
--- a/common/common.h
+++ b/common/common.h
@ -193,9 +193,10 @@ std::vector<llama_token> llama_tokenize(

 // tokenizes a token into a piece
 // should work similar to Python's `tokenizer.id_to_piece`
+// special = true includes control/eos pieces, default is omitting them
 std::string llama_token_to_piece(
        const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token, bool special = false);

 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
 //       that takes into account the tokenizer type and decides how to handle the leading space
--- a/llama.cpp
+++ b/llama.cpp
@ -1194,10 +1194,10 @@ static void ggml_offload_nop(struct ggml_tensor * tensor) {

 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
        GGML_ASSERT(check == -n_tokens);
    }
    else {
@ -10680,10 +10680,15 @@ static std::string llama_decode_text(const std::string & text) {

 // does not write null-terminator to buf
 int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
+    return llama_token_to_piece(model, token, buf, length, false);
+}
+
+// does not write null-terminator to buf
+int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length, bool print_all_types = false) {
    if (0 <= token && token < llama_n_vocab(model)) {
        switch (llama_vocab_get_type(model->vocab)) {
        case LLAMA_VOCAB_TYPE_SPM: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                std::string result = model->vocab.id_to_token[token].text;
                llama_unescape_whitespace(result);
                if (length < (int) result.length()) {
@ -10713,7 +10718,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
            break;
        }
        case LLAMA_VOCAB_TYPE_BPE: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                std::string result = model->vocab.id_to_token[token].text;
                result = llama_decode_text(result);
                if (length < (int) result.length()) {
--- a/llama.h
+++ b/llama.h
@ -652,7 +652,8 @@ extern "C" {
              const struct llama_model * model,
                           llama_token   token,
                                  char * buf,
-                                  int    length);
+                                  int    length,
+                                  bool   print_all_types);

    //
    // Grammar