llama : fix BERT inference without KV cache
This commit is contained in:
parent
0fd13e9473
commit
61a88a1da3
1 changed files with 4 additions and 0 deletions
|
@ -3105,6 +3105,10 @@ static bool llama_cache_init(
|
||||||
ggml_context * ctx = it.second;
|
ggml_context * ctx = it.second;
|
||||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
|
||||||
if (!buf) {
|
if (!buf) {
|
||||||
|
if (!has_kv && !has_rs) {
|
||||||
|
// no buffer was needed, so this is fine
|
||||||
|
return true;
|
||||||
|
}
|
||||||
LLAMA_LOG_ERROR("%s: failed to allocate buffer for kv cache\n", __func__);
|
LLAMA_LOG_ERROR("%s: failed to allocate buffer for kv cache\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue