main : don't print special tokens with --grammar

The CLI interface was recently changed to print special control tokens like the </s> stop message one. This token shouldn't be printed if the grammar flag was passed, unless the grammar specifies it, because that breaks shell-scriptability.
2024-04-26 02:01:58 -07:00 · 2024-04-26 02:01:58 -07:00 · 08c5e35014
commit 08c5e35014
parent 83b72cb086
1 changed files with 4 additions and 2 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -520,6 +520,7 @@ int main(int argc, char ** argv) {
    }
    struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
    bool should_show_special_tokens = sparams.grammar.empty();
    while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
        // predict
@ -733,7 +734,8 @@ int main(int argc, char ** argv) {
        // display text
        if (input_echo && display) {
            for (auto id : embd) {
-                const std::string token_str = llama_token_to_piece(ctx, id);
+                const std::string token_str =
                        llama_token_to_piece(ctx, id, should_show_special_tokens);
                printf("%s", token_str.c_str());
                if (embd.size() > 1) {
@ -899,7 +901,7 @@ int main(int argc, char ** argv) {
                    for (size_t i = original_size; i < embd_inp.size(); ++i) {
                        const llama_token token = embd_inp[i];
                        output_tokens.push_back(token);
-                        output_ss << llama_token_to_piece(ctx, token);
+                        output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens);
                    }
                    n_remain -= line_inp.size();