ggml : add NUMA-aware buffer type that allocates pages accordingly to the first-touch policy

llama : use NUMA-aware buffer type for KV cache
2025-02-01 17:40:45 +01:00 · 2025-02-01 17:40:45 +01:00 · eb3041a202
commit eb3041a202
parent 5bbc7362cb
3 changed files with 94 additions and 1 deletions
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@ -348,6 +348,7 @@ extern "C" {
    // CPU buffer types are always available
    GGML_API ggml_backend_buffer_t      ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
    GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
+    GGML_API ggml_backend_buffer_type_t ggml_backend_numa_buffer_type(void);

 #ifdef  __cplusplus
 }