llama : accept a list of devices to use to offload a model (#10497)

* llama : accept a list of devices to use to offload a model

* accept `--dev none` to completely disable offloading

* fix dev list with dl backends

* rename env parameter to LLAMA_ARG_DEVICE for consistency
This commit is contained in:
Diego Devesa 2024-11-25 19:30:06 +01:00 committed by GitHub
parent 1f922254f0
commit 10bce0450f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 104 additions and 27 deletions

View file

@ -692,6 +692,7 @@ struct server_context {
auto params_dft = params_base;
params_dft.devices = params_base.speculative.devices;
params_dft.model = params_base.speculative.model;
params_dft.n_ctx = params_base.speculative.n_ctx;
params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers;