main : don't print special tokens with --grammar

The CLI interface was recently changed to print special control tokens
like the </s> stop message one. This token shouldn't be printed if the
grammar flag was passed, unless the grammar specifies it, because that
breaks shell-scriptability.
This commit is contained in:
Justine Tunney 2024-04-26 02:01:58 -07:00
parent 83b72cb086
commit 08c5e35014
No known key found for this signature in database
GPG key ID: 52965314629936D4

View file

@ -520,6 +520,7 @@ int main(int argc, char ** argv) {
} }
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
bool should_show_special_tokens = sparams.grammar.empty();
while ((n_remain != 0 && !is_antiprompt) || params.interactive) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict // predict
@ -733,7 +734,8 @@ int main(int argc, char ** argv) {
// display text // display text
if (input_echo && display) { if (input_echo && display) {
for (auto id : embd) { for (auto id : embd) {
const std::string token_str = llama_token_to_piece(ctx, id); const std::string token_str =
llama_token_to_piece(ctx, id, should_show_special_tokens);
printf("%s", token_str.c_str()); printf("%s", token_str.c_str());
if (embd.size() > 1) { if (embd.size() > 1) {
@ -899,7 +901,7 @@ int main(int argc, char ** argv) {
for (size_t i = original_size; i < embd_inp.size(); ++i) { for (size_t i = original_size; i < embd_inp.size(); ++i) {
const llama_token token = embd_inp[i]; const llama_token token = embd_inp[i];
output_tokens.push_back(token); output_tokens.push_back(token);
output_ss << llama_token_to_piece(ctx, token); output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens);
} }
n_remain -= line_inp.size(); n_remain -= line_inp.size();