Add llama_detokenize()

2024-06-20 17:51:16 +02:00 · 2024-06-20 17:51:16 +02:00 · eea8dfab6b
commit eea8dfab6b
parent 2075a66a96
2 changed files with 39 additions and 0 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -18509,6 +18509,32 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
    return 0;
 }
 int32_t llama_detokenize(
        const struct llama_model * model,
               const llama_token * tokens,
                         int32_t   n_tokens,
                            char * text,
                         int32_t   text_len_max,
                            bool   special) {
    int32_t avail = text_len_max;
    int32_t total = 0;
    for (int32_t i = 0; i < n_tokens; ++i) {
        GGML_ASSERT(avail >= 0);
        int32_t n_chars = llama_token_to_piece(model, tokens[i], text, avail, special);
        if (n_chars < 0) {
            avail = 0;
            total -= n_chars;
        } else if (n_chars > 0) {
            avail -= n_chars;
            text  += n_chars;
            total += n_chars;
        }
    }
    return total <= text_len_max ? total : -total;
 }
 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
    size_t start = 0;
--- a/llama.h
+++ b/llama.h
@ -897,6 +897,19 @@ extern "C" {
                               int32_t   length,
                                  bool   special);
    /// @details Convert the provided tokens into text.
    /// @param text The char pointer must be large enough to hold the resulting text.
    /// @return Returns the number of chars/bytes on success, no more than text_len_max.
    /// @return Returns a negative number on failure - the number of chars/bytes that would have been returned.
    /// @param special If true, special tokens are rendered in the output.
    LLAMA_API int32_t llama_detokenize(
        const struct llama_model * model,
               const llama_token * tokens,
                         int32_t   n_tokens,
                            char * text,
                         int32_t   text_len_max,
                            bool   special);
    /// Apply chat template. Inspired by hf apply_chat_template() on python.
    /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
    /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template