diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6c7fcd176..d47725653 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1724,7 +1724,7 @@ struct llama_server_context const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get().empty()) || !slot.images.empty(); // empty prompt passed -> release the slot and send empty response - if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt) + if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill) { slot.release(); slot.print_timings();