ggml : add NUMA-aware buffer type that allocates pages accordingly to the first-touch policy
llama : use NUMA-aware buffer type for KV cache
This commit is contained in:
parent
5bbc7362cb
commit
eb3041a202
3 changed files with 94 additions and 1 deletions
|
@ -348,6 +348,7 @@ extern "C" {
|
|||
// CPU buffer types are always available
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_numa_buffer_type(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue