From d15578e63e5648373d42f04a31ca6e37055457ea Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Wed, 3 May 2023 09:33:30 -0400
Subject: [PATCH] Update llama.cpp (session version)

---
 examples/llama_cpp.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/llama_cpp.py b/examples/llama_cpp.py
index 4e4596ea7..5baa6cc76 100644
--- a/examples/llama_cpp.py
+++ b/examples/llama_cpp.py
@@ -71,7 +71,7 @@ LLAMA_FILE_VERSION = ctypes.c_int(1)
 LLAMA_FILE_MAGIC = b"ggjt"
 LLAMA_FILE_MAGIC_UNVERSIONED = b"ggml"
 LLAMA_SESSION_MAGIC = b"ggsn"
-LLAMA_SESSION_VERSION = ctypes.c_int(0)
+LLAMA_SESSION_VERSION = ctypes.c_int(1)
 
 llama_context_p = c_void_p
 
@@ -239,7 +239,8 @@ _lib.llama_set_rng_seed.argtypes = [llama_context_p, c_int]
 _lib.llama_set_rng_seed.restype = None
 
 
-# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
+# Returns the maximum size in bytes of the state (rng, logits, embedding
+# and kv_cache) - will often be smaller after compacting tokens
 def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
     return _lib.llama_get_state_size(ctx)