llama : accept a list of devices to use to offload a model (#10497)

* llama : accept a list of devices to use to offload a model

* accept `--dev none` to completely disable offloading

* fix dev list with dl backends

* rename env parameter to LLAMA_ARG_DEVICE for consistency
This commit is contained in:
Diego Devesa 2024-11-25 19:30:06 +01:00 committed by GitHub
parent 1f922254f0
commit 10bce0450f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 104 additions and 27 deletions

View file

@ -377,9 +377,6 @@ void common_init() {
#endif
LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
// load dynamic backends
ggml_backend_load_all();
}
std::string common_params_get_system_info(const common_params & params) {
@ -982,9 +979,12 @@ void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_l
}
}
struct llama_model_params common_model_params_to_llama(const common_params & params) {
struct llama_model_params common_model_params_to_llama(common_params & params) {
auto mparams = llama_model_default_params();
if (!params.devices.empty()) {
mparams.devices = params.devices.data();
}
if (params.n_gpu_layers != -1) {
mparams.n_gpu_layers = params.n_gpu_layers;
}