diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f9d20fee5..8684771e2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -692,6 +692,7 @@ struct server_context { auto params_dft = params_base; + params_dft.devices = params_base.speculative.devices; params_dft.model = params_base.speculative.model; params_dft.n_ctx = params_base.speculative.n_ctx; params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers; diff --git a/examples/speculative-simple/speculative-simple.cpp b/examples/speculative-simple/speculative-simple.cpp index 1bc7f428c..7bf9056bf 100644 --- a/examples/speculative-simple/speculative-simple.cpp +++ b/examples/speculative-simple/speculative-simple.cpp @@ -46,6 +46,7 @@ int main(int argc, char ** argv) { ctx_tgt = llama_init_tgt.context; // load the draft model + params.devices = params.speculative.devices; params.model = params.speculative.model; params.n_ctx = params.speculative.n_ctx; params.n_batch = params.speculative.n_ctx > 0 ? params.speculative.n_ctx : params.n_batch;