llama : suggest reduce ctx size when kv init fails

ggml : do not abort when ggml_aligned_malloc fails
2024-11-02 00:55:19 +01:00 · 2024-11-02 00:54:16 +01:00
3 changed files with 2 additions and 2 deletions
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
    void * data = ggml_aligned_malloc(size);
    if (data == NULL) {
-        GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
+        GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
        return NULL;
    }
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -433,7 +433,6 @@ void * ggml_aligned_malloc(size_t size) {
                break;
        }
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
        GGML_ABORT("fatal error");
        return NULL;
    }
    return aligned_memory;
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(
        if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {
            LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
            LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);
            llama_free(ctx);
            return nullptr;
        }
Author	SHA1	Message	Date
slaren	20e12112fd	llama : suggest reduce ctx size when kv init fails	2024-11-02 00:55:19 +01:00
slaren	bf60f27cda	ggml : do not abort when ggml_aligned_malloc fails	2024-11-02 00:54:16 +01:00