Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
slaren
20e12112fd llama : suggest reduce ctx size when kv init fails 2024-11-02 00:55:19 +01:00
slaren
bf60f27cda ggml : do not abort when ggml_aligned_malloc fails 2024-11-02 00:54:16 +01:00
3 changed files with 2 additions and 2 deletions

View file

@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
void * data = ggml_aligned_malloc(size); void * data = ggml_aligned_malloc(size);
if (data == NULL) { if (data == NULL) {
GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
return NULL; return NULL;
} }

View file

@ -433,7 +433,6 @@ void * ggml_aligned_malloc(size_t size) {
break; break;
} }
GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
GGML_ABORT("fatal error");
return NULL; return NULL;
} }
return aligned_memory; return aligned_memory;

View file

@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(
if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) { if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {
LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__); LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);
llama_free(ctx); llama_free(ctx);
return nullptr; return nullptr;
} }