diff --git a/examples/main/main.cpp b/examples/main/main.cpp index a74d4d9c7..3fb9d3eaf 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -520,6 +520,7 @@ int main(int argc, char ** argv) { } struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); + bool should_show_special_tokens = sparams.grammar.empty(); while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -733,7 +734,8 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id); + const std::string token_str = + llama_token_to_piece(ctx, id, should_show_special_tokens); printf("%s", token_str.c_str()); if (embd.size() > 1) { @@ -899,7 +901,7 @@ int main(int argc, char ** argv) { for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; output_tokens.push_back(token); - output_ss << llama_token_to_piece(ctx, token); + output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); } n_remain -= line_inp.size();