llama : add option to override tensor buffers

2025-01-24 20:56:09 +01:00 · 2025-01-24 20:56:09 +01:00 · f07c2ec505
commit f07c2ec505
parent 9fbadaef4f
9 changed files with 87 additions and 8 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -275,10 +275,18 @@ extern "C" {
        };
    };

+    struct llama_model_tensor_buft_override {
+        const char * pattern;
+        ggml_backend_buffer_type_t buft;
+    };
+
    struct llama_model_params {
        // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
        ggml_backend_dev_t * devices;

+        // NULL-terminated list of buffer types to use for tensors that match a pattern
+        const struct llama_model_tensor_buft_override * tensor_buft_overrides;
+
        int32_t n_gpu_layers; // number of layers to store in VRAM
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs