diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d2b6bd335..908802aa8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2258,7 +2258,12 @@ struct server_context { completion_token_output result; const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i); if (id == -1) { - continue; // keep going, don't crash, already logged + send_error(slot, "can't get completions out of an embeddings model"); + slot.cache_tokens.clear(); + slot.reset(); + slot.release(); + slot.i_batch = -1; + continue; // continue loop of slots } llama_sampling_accept(slot.ctx_sampling, ctx, id, true);