llama : add llama_model_load_from_splits (#11255)

* llama : add `llama_model_load_from_splits`

* update
This commit is contained in:
Xuan Son Nguyen 2025-01-16 13:54:08 +01:00 committed by GitHub
parent c67cc9837d
commit 681149ced2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 116 additions and 24 deletions

View file

@ -526,7 +526,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
kv_overrides = v->data();
}
llama_model_loader ml(fname_inp, use_mmap, /*check_tensors*/ true, kv_overrides);
std::vector<std::string> splits = {};
llama_model_loader ml(fname_inp, splits, use_mmap, /*check_tensors*/ true, kv_overrides);
ml.init_mappings(false); // no prefetching
llama_model model(llama_model_default_params());