diff --git a/ggml.c b/ggml.c index 63aa5eb6e..7ac9d6754 100644 --- a/ggml.c +++ b/ggml.c @@ -3043,47 +3043,36 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) return result; } -#ifdef __APPLE__ -#define MLOCK_SUGGESTION \ - "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \ - "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n" -#else -#define MLOCK_SUGGESTION \ - "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n" -#endif - bool ggml_mlock_supported(void) { return GGML_MLOCK_SUPPORT; } -bool ggml_mlock( - struct ggml_context * ctx, - const void *opt_extra_addr, - size_t opt_extra_len, - char **err_p) { - // TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32 #if GGML_MLOCK_SUPPORT +#ifdef __APPLE__ + #define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \ + "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l)." +#else + #define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)." +#endif +bool ggml_mlock(struct ggml_context * ctx, char ** err_p) { if (ctx->mem_buffer_mlocked) { return true; } - if (mlock(ctx->mem_buffer, ctx->mem_size) || - (opt_extra_len && - mlock(opt_extra_addr, opt_extra_len))) { - if ((*err_p = malloc(1024))) { - snprintf(*err_p, 1024, - "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION, - ctx->mem_size + opt_extra_len, - strerror(errno)); - } + if (mlock(ctx->mem_buffer, ctx->mem_size)) { + int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION, + ctx->mem_size, strerror(errno)); + GGML_ASSERT(ret >= 0); return false; } ctx->mem_buffer_mlocked = true; return true; +} #else // GGML_MLOCK_SUPPORT +bool ggml_mlock(struct ggml_context * ctx, char ** err_p) { *err_p = strdup("can't mlock because it's not supported on this system"); return false; -#endif // GGML_MLOCK_SUPPORT } +#endif // GGML_MLOCK_SUPPORT //////////////////////////////////////////////////////////////////////////////// diff --git a/ggml.h b/ggml.h index ad962b109..323a48759 100644 --- a/ggml.h +++ b/ggml.h @@ -345,11 +345,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx); size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch); bool ggml_mlock_supported(void); -bool ggml_mlock( - struct ggml_context * ctx, - const void *opt_extra_addr, - size_t opt_extra_len, - char **err_p); +bool ggml_mlock(struct ggml_context * ctx, char ** err_p); struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, diff --git a/llama.cpp b/llama.cpp index 3de1b3a7c..6b43aef5b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1595,10 +1595,7 @@ struct llama_context * llama_init_from_file( if (params.use_mlock) { char *err; - if (!ggml_mlock(ctx->model.ctx, - ctx->model.mm_addr, - ctx->model.mm_length, - &err)) { + if (!ggml_mlock(ctx->model.ctx, &err)) { fprintf(stderr, "%s\n", err); free(err); llama_free(ctx);