From 8be06dc74596e4c5599a370c298fc0c429ff0f8d Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 22 May 2024 01:11:38 -0700 Subject: [PATCH] Update examples/server/server.cpp Co-authored-by: compilade --- examples/server/server.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d2b6bd335..908802aa8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2258,7 +2258,12 @@ struct server_context { completion_token_output result; const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i); if (id == -1) { - continue; // keep going, don't crash, already logged + send_error(slot, "can't get completions out of an embeddings model"); + slot.cache_tokens.clear(); + slot.reset(); + slot.release(); + slot.i_batch = -1; + continue; // continue loop of slots } llama_sampling_accept(slot.ctx_sampling, ctx, id, true);