llama : accept a list of devices to use to offload a model (#10497)
* llama : accept a list of devices to use to offload a model * accept `--dev none` to completely disable offloading * fix dev list with dl backends * rename env parameter to LLAMA_ARG_DEVICE for consistency
This commit is contained in:
parent
1f922254f0
commit
10bce0450f
9 changed files with 104 additions and 27 deletions
|
@ -272,6 +272,9 @@ extern "C" {
|
|||
};
|
||||
|
||||
struct llama_model_params {
|
||||
// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
||||
ggml_backend_dev_t * devices;
|
||||
|
||||
int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue