diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cbf36ad67..c096ebf57 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1510,7 +1510,7 @@ struct llama_server_context const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get().empty()) || !slot.images.empty(); // empty prompt passed -> release the slot and send empty response - if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt) + if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill) { slot.release(); slot.print_timings();