enable CPU HBM (#2603)
* add cpu hbm support * add memalign 0 byte check * Update ggml.c * Update llama.cpp * ggml : allow ggml_init with 0 size * retrigger ci * fix code style --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
6336d834ec
commit
7f412dab9c
3 changed files with 38 additions and 2 deletions
12
llama.cpp
12
llama.cpp
|
@ -126,6 +126,9 @@ void replace_all(std::string & s, const std::string & search, const std::string
|
|||
}
|
||||
s = std::move(result);
|
||||
}
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
#include <hbwmalloc.h>
|
||||
#endif
|
||||
|
||||
static void zeros(std::ofstream & file, size_t n) {
|
||||
char zero = 0;
|
||||
|
@ -450,6 +453,9 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
|
|||
#elif GGML_USE_METAL
|
||||
# define llama_host_malloc(n) ggml_metal_host_malloc(n)
|
||||
# define llama_host_free(data) ggml_metal_host_free(data)
|
||||
#elif GGML_USE_CPU_HBM
|
||||
# define llama_host_malloc(n) hbw_malloc(n)
|
||||
# define llama_host_free(data) if (data != NULL) hbw_free(data)
|
||||
#else
|
||||
# define llama_host_malloc(n) malloc(n)
|
||||
# define llama_host_free(data) free(data)
|
||||
|
@ -1489,7 +1495,11 @@ struct llama_model_loader {
|
|||
// allocate temp buffer if not using mmap
|
||||
if (!use_mmap && cur->data == NULL) {
|
||||
GGML_ASSERT(cur->backend != GGML_BACKEND_CPU);
|
||||
cur->data = malloc(ggml_nbytes(cur));
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
cur->data = (uint8_t*)hbw_malloc(ggml_nbytes(cur));
|
||||
#else
|
||||
cur->data = (uint8_t*)malloc(ggml_nbytes(cur));
|
||||
#endif
|
||||
}
|
||||
|
||||
load_data_for(cur);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue