ggml backend interface wip

refactor ggml-cuda
This commit is contained in:
slaren 2023-07-10 17:32:06 +02:00
parent 6e7cca4047
commit 0d2b66c638
15 changed files with 4480 additions and 4782 deletions

View file

@ -203,6 +203,17 @@ struct llama_mmap {
}
}
void discard(void * addr, size_t len) {
// align to the page size
int page_size = sysconf(_SC_PAGESIZE);
addr = (void *) (((uintptr_t) addr) & ~(page_size - 1));
len = (len + page_size - 1) & ~(page_size - 1);
if (madvise(addr, len, MADV_DONTNEED)) {
fprintf(stderr, "warning: madvise(.., MADV_DONTNEED) failed: %s\n",
strerror(errno));
}
}
~llama_mmap() {
munmap(addr, size);
}
@ -247,6 +258,10 @@ struct llama_mmap {
#endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8
}
void discard(void * addr, size_t len) {
VirtualAlloc(addr, len, MEM_RESET, PAGE_NOACCESS);
}
~llama_mmap() {
if (!UnmapViewOfFile(addr)) {
fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
@ -262,6 +277,13 @@ struct llama_mmap {
throw std::runtime_error(std::string("mmap not supported"));
}
void discard(void * addr, size_t len) {
(void) addr;
(void) len;
throw std::runtime_error(std::string("mmap not supported"));
}
#endif
};
@ -451,14 +473,14 @@ struct llama_buffer {
llama_buffer& operator=(llama_buffer&&) = delete;
};
#ifdef GGML_USE_CUBLAS
#if defined(GGML_USE_CUDA)
#include "ggml-cuda.h"
struct llama_ctx_buffer {
struct llama_host_buffer {
uint8_t * addr = NULL;
bool is_cuda;
size_t size = 0;
llama_ctx_buffer() = default;
llama_host_buffer() = default;
void resize(size_t size) {
free();
@ -487,18 +509,19 @@ struct llama_ctx_buffer {
addr = NULL;
}
~llama_ctx_buffer() {
~llama_host_buffer() {
free();
}
// disable copy and move
llama_ctx_buffer(const llama_ctx_buffer&) = delete;
llama_ctx_buffer(llama_ctx_buffer&&) = delete;
llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
llama_host_buffer(const llama_host_buffer&) = delete;
llama_host_buffer(llama_host_buffer&&) = delete;
llama_host_buffer& operator=(const llama_host_buffer&) = delete;
llama_host_buffer& operator=(llama_host_buffer&&) = delete;
};
#else
typedef llama_buffer llama_ctx_buffer;
typedef llama_buffer llama_host_buffer;
#endif
typedef llama_buffer llama_ctx_buffer;
#endif