ggml : add NUMA-aware buffer type that allocates pages accordingly to the first-touch policy

llama : use NUMA-aware buffer type for KV cache
This commit is contained in:
Stanisław Szymczyk 2025-02-01 17:40:45 +01:00
parent 5bbc7362cb
commit eb3041a202
3 changed files with 94 additions and 1 deletions

View file

@ -348,6 +348,7 @@ extern "C" {
// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
GGML_API ggml_backend_buffer_type_t ggml_backend_numa_buffer_type(void);
#ifdef __cplusplus
}