Update llama.cpp (remove llama_get_kv_cache)

2023-04-24 09:30:10 -04:00 · 2023-04-24 09:30:10 -04:00 · 80c18cb665
commit 80c18cb665
parent bf9f02d8ee
1 changed files with 0 additions and 30 deletions
--- a/examples/llama_cpp.py
+++ b/examples/llama_cpp.py
@ -201,25 +201,6 @@ _lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p,
 _lib.llama_apply_lora_from_file.restype = c_int
 # Returns the KV cache that will contain the context for the
 # ongoing prediction with the model.
 def llama_get_kv_cache(ctx: llama_context_p):
    return _lib.llama_get_kv_cache(ctx)
 _lib.llama_get_kv_cache.argtypes = [llama_context_p]
 _lib.llama_get_kv_cache.restype = POINTER(c_uint8)
 # Returns the size of the KV cache
 def llama_get_kv_cache_size(ctx: llama_context_p) -> c_size_t:
    return _lib.llama_get_kv_cache_size(ctx)
 _lib.llama_get_kv_cache_size.argtypes = [llama_context_p]
 _lib.llama_get_kv_cache_size.restype = c_size_t
 # Returns the number of tokens in the KV cache
 def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
    return _lib.llama_get_kv_cache_token_count(ctx)
@ -229,17 +210,6 @@ _lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p]
 _lib.llama_get_kv_cache_token_count.restype = c_int
 # Sets the KV cache containing the current context for the model
 def llama_set_kv_cache(
    ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int
 ):
    return _lib.llama_set_kv_cache(ctx, kv_cache, n_size, n_token_count)
 _lib.llama_set_kv_cache.argtypes = [llama_context_p, POINTER(c_uint8), c_size_t, c_int]
 _lib.llama_set_kv_cache.restype = None
 # Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
 def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
    return _lib.llama_get_state_size(ctx)