Token to piece (#1)

* Update common.h * Update common.cpp * Update llama.h * Update llama.cpp * Update llama.h * Update common.cpp * Update llama.cpp
2023-11-17 02:11:00 +01:00 · 2023-11-17 02:11:00 +01:00 · aa094ace8e
commit aa094ace8e
parent 8da46278e1
4 changed files with 15 additions and 9 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1024,12 +1024,12 @@ std::vector<llama_token> llama_tokenize(
    return result;
 }

-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(),special);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(),special);
        GGML_ASSERT(check == -n_tokens);
    } else {
        result.resize(n_tokens);
--- a/common/common.h
+++ b/common/common.h
@ -180,9 +180,10 @@ std::vector<llama_token> llama_tokenize(

 // tokenizes a token into a piece
 // should work similar to Python's `tokenizer.id_to_piece`
+// special = true includes control/eos pieces, default is omitting them
 std::string llama_token_to_piece(
        const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token, bool special = false);

 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
 //       that takes into account the tokenizer type and decides how to handle the leading space
--- a/llama.cpp
+++ b/llama.cpp
@ -1042,10 +1042,10 @@ static void ggml_offload_nop(struct ggml_tensor * tensor) {

 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(),false);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(),false);
        GGML_ASSERT(check == -n_tokens);
    }
    else {
@ -9338,10 +9338,14 @@ static std::string llama_decode_text(const std::string & text) {

 // does not write null-terminator to buf
 int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
+    return llama_token_to_piece(model, token, buf, length, false);
+}   
+// does not write null-terminator to buf
+int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length, bool print_all_types=false) {
    if (0 <= token && token < llama_n_vocab(model)) {
        switch (llama_vocab_get_type(model->vocab)) {
        case LLAMA_VOCAB_TYPE_SPM: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                std::string result = model->vocab.id_to_token[token].text;
                llama_unescape_whitespace(result);
                if (length < (int) result.length()) {
@ -9371,7 +9375,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
            break;
        }
        case LLAMA_VOCAB_TYPE_BPE: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                std::string result = model->vocab.id_to_token[token].text;
                result = llama_decode_text(result);
                if (length < (int) result.length()) {
--- a/llama.h
+++ b/llama.h
@ -550,7 +550,8 @@ extern "C" {
              const struct llama_model * model,
                           llama_token   token,
                                  char * buf,
-                                  int    length);
+                                  int    length,
+                                  bool   print_all_types);

    //
    // Grammar