From 08c5e350146a53bb7b6ff8f84a84e8b341c19f40 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 26 Apr 2024 02:01:58 -0700 Subject: [PATCH] main : don't print special tokens with --grammar The CLI interface was recently changed to print special control tokens like the stop message one. This token shouldn't be printed if the grammar flag was passed, unless the grammar specifies it, because that breaks shell-scriptability. --- examples/main/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index a74d4d9c7..3fb9d3eaf 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -520,6 +520,7 @@ int main(int argc, char ** argv) { } struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); + bool should_show_special_tokens = sparams.grammar.empty(); while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -733,7 +734,8 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id); + const std::string token_str = + llama_token_to_piece(ctx, id, should_show_special_tokens); printf("%s", token_str.c_str()); if (embd.size() > 1) { @@ -899,7 +901,7 @@ int main(int argc, char ** argv) { for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; output_tokens.push_back(token); - output_ss << llama_token_to_piece(ctx, token); + output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); } n_remain -= line_inp.size();