llama : refactor get / set state + remove redundant kv cache API (#1143)

This commit is contained in:
Georgi Gerganov 2023-04-24 07:40:02 +03:00 committed by GitHub
parent 1d78fecdab
commit c4fe84fb0d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 181 additions and 156 deletions

14
llama.h
View file

@ -112,23 +112,9 @@ extern "C" {
const char * path_base_model,
int n_threads);
// Returns the KV cache that will contain the context for the
// ongoing prediction with the model.
LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
// Returns the size of the KV cache
LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx);
// Returns the number of tokens in the KV cache
LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
// Sets the KV cache containing the current context for the model
LLAMA_API void llama_set_kv_cache(
struct llama_context * ctx,
const uint8_t * kv_cache,
size_t n_size,
int n_token_count);
// Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
LLAMA_API size_t llama_get_state_size(struct llama_context * ctx);