From 0410e03cedfc80ac57de8b4ce5d1f4729b5863ee Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 22 Jan 2025 17:24:49 +0100 Subject: [PATCH] server : fix draft context not being released --- examples/server/server.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 412908aa8..4cfb3c9bb 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1772,6 +1772,9 @@ struct server_context { // force F16 KV cache for the draft model for extra performance cparams_dft.type_k = GGML_TYPE_F16; cparams_dft.type_v = GGML_TYPE_F16; + + // the context is not needed - we will create one for each slot + llama_init_dft.context.reset(); } chat_templates = common_chat_templates_from_model(model, params_base.chat_template);