llama : only copy used KV cache in get / set state (#1272)

* llama : only copy used KV cache in get / set state

* switch to ggml for copying k, v

* avoid designated initializers
This commit is contained in:
Evan Jones 2023-05-02 22:26:13 -04:00 committed by GitHub
parent 2485d7a4d3
commit e216aa0463
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 23 deletions

View file

@ -23,7 +23,7 @@
#define LLAMA_FILE_MAGIC 'ggjt'
#define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
#define LLAMA_SESSION_MAGIC 'ggsn'
#define LLAMA_SESSION_VERSION 0
#define LLAMA_SESSION_VERSION 1
#ifdef __cplusplus
extern "C" {
@ -127,7 +127,8 @@ extern "C" {
// Sets the current rng seed.
LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
// Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
// Returns the maximum size in bytes of the state (rng, logits, embedding
// and kv_cache) - will often be smaller after compacting tokens
LLAMA_API size_t llama_get_state_size(const struct llama_context * ctx);
// Copies the state to the specified destination address.