diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 98f17683f..d1ea343dd 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3823,7 +3823,9 @@ int main(int argc, char ** argv) { std::vector tasks; try { - std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, data.at("prompt"), true, true); + const auto & prompt = data.at("prompt"); + LOG_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str()); + std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true); tasks.reserve(tokenized_prompts.size()); for (size_t i = 0; i < tokenized_prompts.size(); i++) { server_task task = server_task(type); diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 80cd90eef..fba3ea81d 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -15,7 +15,7 @@ def create_server(): [ (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None), (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), - (None, "Book", "What is the best book", 8, "^ blue|I want to play with", 23, 8, "length", True, "This is not a chat template, it is"), + (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), ] diff --git a/src/llama-grammar.h b/src/llama-grammar.h index 4ebde1452..252d54d4c 100644 --- a/src/llama-grammar.h +++ b/src/llama-grammar.h @@ -118,10 +118,10 @@ struct llama_grammar { // lazy grammars wait for trigger words or tokens before constraining the sampling. // we still ahve trigger_tokens for non-lazy grammars to force printing of special trigger tokens. // (useful e.g. for tool_choice=required) - bool lazy; - bool awaiting_trigger; // Initialized to true for lazy grammars only - std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found. - std::vector trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special). + bool lazy = false; + bool awaiting_trigger = false; // Initialized to true for lazy grammars only + std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found. + std::vector trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special). std::vector trigger_words; };