From 4912f9d5f091f9b5e20d17a34f1f1f15186db0d0 Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Sun, 2 Apr 2023 12:18:54 +0200 Subject: [PATCH] fix whitespace --- llama.cpp | 20 ++++++++++---------- llama.h | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llama.cpp b/llama.cpp index cf413d983..ffa2b6e8f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1671,28 +1671,28 @@ int llama_model_quantize( // Returns the KV cache that will contain the context for the // ongoing prediction with the model. const uint8_t * llama_get_kv_cache(struct llama_context * ctx) { - return ctx->model.kv_self.buf.data(); + return ctx->model.kv_self.buf.data(); } // Returns the size of the KV cache size_t llama_get_kv_cache_size(struct llama_context * ctx) { - return ctx->model.kv_self.buf.size(); + return ctx->model.kv_self.buf.size(); } int llama_get_kv_cache_token_count(struct llama_context * ctx) { - return ctx->model.kv_self.n; + return ctx->model.kv_self.n; } // Sets the KV cache containing the current context for the model void llama_set_kv_cache( struct llama_context * ctx, - const uint8_t * kv_cache, - size_t n_size, - int n_token_count) { - // Make sure we have the same kv cache setup - LLAMA_ASSERT(ctx->model.kv_self.buf.size() == n_size); - memcpy(ctx->model.kv_self.buf.data(), kv_cache, n_size); - ctx->model.kv_self.n = n_token_count; + const uint8_t * kv_cache, + size_t n_size, + int n_token_count) { + // Make sure we have the same kv cache setup + LLAMA_ASSERT(ctx->model.kv_self.buf.size() == n_size); + memcpy(ctx->model.kv_self.buf.data(), kv_cache, n_size); + ctx->model.kv_self.n = n_token_count; } int llama_eval( diff --git a/llama.h b/llama.h index b4769ed20..04e2bf71c 100644 --- a/llama.h +++ b/llama.h @@ -96,9 +96,9 @@ extern "C" { // Sets the KV cache containing the current context for the model LLAMA_API void llama_set_kv_cache( struct llama_context * ctx, - const uint8_t * kv_cache, - size_t n_size, - int n_token_count); + const uint8_t * kv_cache, + size_t n_size, + int n_token_count); // Run the llama inference to obtain the logits and probabilities for the next token. // tokens + n_tokens is the provided batch of new tokens to process