From 9ef5d089271550a99c5c578898b2ae612713da7e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 13 Nov 2024 19:59:20 +0200 Subject: [PATCH] llama : add comments about KV cache state after error --- include/llama.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/llama.h b/include/llama.h index ccb48f73c..5e742642e 100644 --- a/include/llama.h +++ b/include/llama.h @@ -797,7 +797,7 @@ extern "C" { // Processes a batch of tokens with the ecoder part of the encoder-decoder model. // Stores the encoder output internally for later use by the decoder cross-attention layers. // 0 - success - // < 0 - error + // < 0 - error. the KV cache state is restored to the state before this call LLAMA_API int32_t llama_encode( struct llama_context * ctx, struct llama_batch batch); @@ -805,7 +805,7 @@ extern "C" { // Positive return values does not mean a fatal error, but rather a warning. // 0 - success // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context) - // < 0 - error + // < 0 - error. the KV cache state is restored to the state before this call LLAMA_API int32_t llama_decode( struct llama_context * ctx, struct llama_batch batch);