llama : add option to override tensor buffers

This commit is contained in:
slaren 2025-01-24 20:56:09 +01:00
parent 9fbadaef4f
commit f07c2ec505
9 changed files with 87 additions and 8 deletions

View file

@ -445,7 +445,8 @@ llama_model_loader::llama_model_loader(
std::vector<std::string> & splits,
bool use_mmap,
bool check_tensors,
const struct llama_model_kv_override * param_overrides_p) {
const llama_model_kv_override * param_overrides_p,
const llama_model_tensor_buft_override * param_tensor_buft_overrides_p) {
int trace = 0;
if (getenv("LLAMA_TRACE")) {
trace = atoi(getenv("LLAMA_TRACE"));
@ -457,6 +458,8 @@ llama_model_loader::llama_model_loader(
}
}
tensor_buft_overrides = param_tensor_buft_overrides_p;
// Load the main GGUF
struct ggml_context * ctx = NULL;
struct gguf_init_params params = {