diff --git a/ggml.c b/ggml.c
index 63aa5eb6e..7ac9d6754 100644
--- a/ggml.c
+++ b/ggml.c
@@ -3043,47 +3043,36 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
     return result;
 }
 
-#ifdef __APPLE__
-#define MLOCK_SUGGESTION \
-    "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
-    "decreasing 'vm.global_no_user_wire_amount'.  Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
-#else
-#define MLOCK_SUGGESTION \
-    "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
-#endif
-
 bool ggml_mlock_supported(void) {
     return GGML_MLOCK_SUPPORT;
 }
 
-bool ggml_mlock(
-        struct ggml_context * ctx,
-        const void *opt_extra_addr,
-        size_t opt_extra_len,
-        char **err_p) {
-    // TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32
 #if GGML_MLOCK_SUPPORT
+#ifdef __APPLE__
+    #define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \
+                             "decreasing 'vm.global_no_user_wire_amount'.  Also try increasing RLIMIT_MLOCK (ulimit -l)."
+#else
+    #define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)."
+#endif
+bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
     if (ctx->mem_buffer_mlocked) {
         return true;
     }
-    if (mlock(ctx->mem_buffer, ctx->mem_size) ||
-        (opt_extra_len &&
-         mlock(opt_extra_addr, opt_extra_len))) {
-        if ((*err_p = malloc(1024))) {
-            snprintf(*err_p, 1024,
-                     "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
-                     ctx->mem_size + opt_extra_len,
-                     strerror(errno));
-        }
+    if (mlock(ctx->mem_buffer, ctx->mem_size)) {
+        int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
+                           ctx->mem_size, strerror(errno));
+        GGML_ASSERT(ret >= 0);
         return false;
     }
     ctx->mem_buffer_mlocked = true;
     return true;
+}
 #else // GGML_MLOCK_SUPPORT
+bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
     *err_p = strdup("can't mlock because it's not supported on this system");
     return false;
-#endif // GGML_MLOCK_SUPPORT
 }
+#endif // GGML_MLOCK_SUPPORT
 
 ////////////////////////////////////////////////////////////////////////////////
 
diff --git a/ggml.h b/ggml.h
index ad962b109..323a48759 100644
--- a/ggml.h
+++ b/ggml.h
@@ -345,11 +345,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
 size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
 
 bool ggml_mlock_supported(void);
-bool ggml_mlock(
-        struct ggml_context * ctx,
-        const void *opt_extra_addr,
-        size_t opt_extra_len,
-        char **err_p);
+bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
 
 struct ggml_tensor * ggml_new_tensor(
         struct ggml_context * ctx,
diff --git a/llama.cpp b/llama.cpp
index 3de1b3a7c..6b43aef5b 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1595,10 +1595,7 @@ struct llama_context * llama_init_from_file(
 
     if (params.use_mlock) {
         char *err;
-        if (!ggml_mlock(ctx->model.ctx,
-                        ctx->model.mm_addr,
-                        ctx->model.mm_length,
-                        &err)) {
+        if (!ggml_mlock(ctx->model.ctx, &err)) {
             fprintf(stderr, "%s\n", err);
             free(err);
             llama_free(ctx);