diff --git a/llama.cpp b/llama.cpp
index 4bb62a0a1..6cf9db133 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -149,10 +149,6 @@ struct llama_model {
     // the model memory buffer
     std::vector<uint8_t> buf;
 
-    // model memory mapped file
-    void * mm_addr;
-    size_t mm_length;
-
     // tensors
     int n_loaded;
     std::unordered_map<std::string, struct ggml_tensor *> tensors;
@@ -300,32 +296,22 @@ struct llama_context_params llama_context_default_params() {
 // model loading
 //
 
-static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) {
+static void * mmap_file(const char* fname) {
 #if defined(MAP_FAILED)
-    // POSIX
+    // POSIX mmap
     int fd = open(fname, O_RDONLY);
-    mm_length = lseek(fd, 0, SEEK_END);
-    mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0);
-    close(fd);
+    size_t len = lseek(fd, 0, SEEK_END);
+    void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
     if (mm_addr == MAP_FAILED) {
         perror("mmap failed");
         mm_addr = NULL;
-        mm_length = 0;
     }
+    close(fd);
+    return mm_addr;
 #else
     // TODO: windows support
     (void)(fname); // suppress warnings
-#endif
-}
-
-static void munmap_file(void * addr, size_t length) {
-#if defined(MAP_FAILED)
-    // POSIX
-    munmap(addr, length);
-#else
-    // TODO: windows support
-    (void)(addr); // suppress warnings
-    (void)(length);
+    return NULL;
 #endif
 }
 
@@ -494,15 +480,12 @@ static bool llama_model_load(
     bool use_mmap = (n_parts == 1);
 
     // try to memory map the model file
-    void * mm_addr = NULL;
+    void* mm_addr = NULL;
     if (use_mmap) {
-        mmap_file(fname.c_str(), model.mm_addr, model.mm_length);
-        if (model.mm_addr == NULL) {
+        mm_addr = mmap_file(fname.c_str());
+        if (mm_addr == NULL) {
             use_mmap = false;
         }
-        else {
-            mm_addr = model.mm_addr;
-        }
     }
 
     auto & ctx = model.ctx;
@@ -1767,10 +1750,6 @@ void llama_free(struct llama_context * ctx) {
         ggml_free(ctx->model.ctx);
     }
 
-    if (ctx->model.mm_addr) {
-        munmap_file(ctx->model.mm_addr, ctx->model.mm_length);
-    }
-
     delete ctx;
 }