diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8eca14b86..f69a417eb 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2344,7 +2344,10 @@ struct server_context { common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true); } - llama_decode(ctx, slot.batch_spec); + int ret = llama_decode(ctx, slot.batch_spec); + if (ret != 0) { + continue; + } // the accepted tokens from the speculation const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);