llama : add option to render special/control tokens (#6807)

* make : fix common dep on llama.h * llama : add option to render special tokens * readme : add API change notice ggml-ci * swift : fix build
2024-04-21 18:36:45 +03:00 · 2024-04-21 18:36:45 +03:00 · 40f74e4d73
commit 40f74e4d73
parent b9cc76d87e
7 changed files with 25 additions and 20 deletions
--- a/llama.h
+++ b/llama.h
@ -828,11 +828,13 @@ extern "C" {
    // Uses the vocabulary in the provided context.
    // Does not write null terminator to the buffer.
    // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
+    // @param special If true, special tokens are rendered in the output.
    LLAMA_API int32_t llama_token_to_piece(
              const struct llama_model * model,
                           llama_token   token,
                                  char * buf,
-                               int32_t   length);
+                               int32_t   length,
+                                  bool   special);

    /// Apply chat template. Inspired by hf apply_chat_template() on python.
    /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"