From d15578e63e5648373d42f04a31ca6e37055457ea Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Wed, 3 May 2023 09:33:30 -0400 Subject: [PATCH] Update llama.cpp (session version) --- examples/llama_cpp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/llama_cpp.py b/examples/llama_cpp.py index 4e4596ea7..5baa6cc76 100644 --- a/examples/llama_cpp.py +++ b/examples/llama_cpp.py @@ -71,7 +71,7 @@ LLAMA_FILE_VERSION = ctypes.c_int(1) LLAMA_FILE_MAGIC = b"ggjt" LLAMA_FILE_MAGIC_UNVERSIONED = b"ggml" LLAMA_SESSION_MAGIC = b"ggsn" -LLAMA_SESSION_VERSION = ctypes.c_int(0) +LLAMA_SESSION_VERSION = ctypes.c_int(1) llama_context_p = c_void_p @@ -239,7 +239,8 @@ _lib.llama_set_rng_seed.argtypes = [llama_context_p, c_int] _lib.llama_set_rng_seed.restype = None -# Returns the size in bytes of the state (rng, logits, embedding and kv_cache) +# Returns the maximum size in bytes of the state (rng, logits, embedding +# and kv_cache) - will often be smaller after compacting tokens def llama_get_state_size(ctx: llama_context_p) -> c_size_t: return _lib.llama_get_state_size(ctx)