From 9ef5d089271550a99c5c578898b2ae612713da7e Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 13 Nov 2024 19:59:20 +0200
Subject: [PATCH] llama : add comments about KV cache state after error

---
 include/llama.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/llama.h b/include/llama.h
index ccb48f73c..5e742642e 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -797,7 +797,7 @@ extern "C" {
     // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
     // Stores the encoder output internally for later use by the decoder cross-attention layers.
     //   0 - success
-    // < 0 - error
+    // < 0 - error. the KV cache state is restored to the state before this call
     LLAMA_API int32_t llama_encode(
             struct llama_context * ctx,
               struct llama_batch   batch);
@@ -805,7 +805,7 @@ extern "C" {
     // Positive return values does not mean a fatal error, but rather a warning.
     //   0 - success
     //   1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
-    // < 0 - error
+    // < 0 - error. the KV cache state is restored to the state before this call
     LLAMA_API int32_t llama_decode(
             struct llama_context * ctx,
               struct llama_batch   batch);