more accurate mlock

This commit is contained in:
slaren 2023-12-20 03:01:33 +01:00
parent 72a0c96621
commit d3e7242bdb

View file

@ -2270,10 +2270,7 @@ struct llama_model_loader {
} }
} }
void load_all_data(struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) const {
void load_all_data(struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) {
size_t size_lock = 0;
size_t size_data = 0; size_t size_data = 0;
for (int i = 0; i < gguf_get_n_tensors(ctx_gguf); i++) { for (int i = 0; i < gguf_get_n_tensors(ctx_gguf); i++) {
@ -2281,7 +2278,7 @@ struct llama_model_loader {
size_data += ggml_nbytes(cur); size_data += ggml_nbytes(cur);
} }
if (use_mmap) { if (use_mmap && buf_mmap) {
if (lmlock) { if (lmlock) {
lmlock->init(mapping->addr); lmlock->init(mapping->addr);
} }
@ -2305,6 +2302,9 @@ struct llama_model_loader {
if (use_mmap) { if (use_mmap) {
if (buf_mmap) { if (buf_mmap) {
ggml_backend_tensor_alloc(buf_mmap, cur, (uint8_t *) mapping->addr + offs); ggml_backend_tensor_alloc(buf_mmap, cur, (uint8_t *) mapping->addr + offs);
if (lmlock) {
lmlock->grow_to(offs + ggml_nbytes(cur));
}
} else { } else {
ggml_backend_tensor_set(cur, (uint8_t *) mapping->addr + offs, 0, ggml_nbytes(cur)); ggml_backend_tensor_set(cur, (uint8_t *) mapping->addr + offs, 0, ggml_nbytes(cur));
} }
@ -2319,11 +2319,6 @@ struct llama_model_loader {
ggml_backend_tensor_set(cur, read_buf.data(), 0, ggml_nbytes(cur)); ggml_backend_tensor_set(cur, read_buf.data(), 0, ggml_nbytes(cur));
} }
} }
if (use_mmap && lmlock) {
size_lock += ggml_nbytes(cur);
lmlock->grow_to(size_lock);
}
} else { } else {
// HACK: mark tensor as allocated // HACK: mark tensor as allocated
cur->data = (void *)(uintptr_t)1; cur->data = (void *)(uintptr_t)1;