diff --git a/llama.cpp b/llama.cpp index 4bb62a0a1..6cf9db133 100644 --- a/llama.cpp +++ b/llama.cpp @@ -149,10 +149,6 @@ struct llama_model { // the model memory buffer std::vector buf; - // model memory mapped file - void * mm_addr; - size_t mm_length; - // tensors int n_loaded; std::unordered_map tensors; @@ -300,32 +296,22 @@ struct llama_context_params llama_context_default_params() { // model loading // -static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) { +static void * mmap_file(const char* fname) { #if defined(MAP_FAILED) - // POSIX + // POSIX mmap int fd = open(fname, O_RDONLY); - mm_length = lseek(fd, 0, SEEK_END); - mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0); - close(fd); + size_t len = lseek(fd, 0, SEEK_END); + void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); if (mm_addr == MAP_FAILED) { perror("mmap failed"); mm_addr = NULL; - mm_length = 0; } + close(fd); + return mm_addr; #else // TODO: windows support (void)(fname); // suppress warnings -#endif -} - -static void munmap_file(void * addr, size_t length) { -#if defined(MAP_FAILED) - // POSIX - munmap(addr, length); -#else - // TODO: windows support - (void)(addr); // suppress warnings - (void)(length); + return NULL; #endif } @@ -494,15 +480,12 @@ static bool llama_model_load( bool use_mmap = (n_parts == 1); // try to memory map the model file - void * mm_addr = NULL; + void* mm_addr = NULL; if (use_mmap) { - mmap_file(fname.c_str(), model.mm_addr, model.mm_length); - if (model.mm_addr == NULL) { + mm_addr = mmap_file(fname.c_str()); + if (mm_addr == NULL) { use_mmap = false; } - else { - mm_addr = model.mm_addr; - } } auto & ctx = model.ctx; @@ -1767,10 +1750,6 @@ void llama_free(struct llama_context * ctx) { ggml_free(ctx->model.ctx); } - if (ctx->model.mm_addr) { - munmap_file(ctx->model.mm_addr, ctx->model.mm_length); - } - delete ctx; }