fix whitespace

2023-04-02 12:18:54 +02:00 · 2023-04-02 12:18:54 +02:00 · 4912f9d5f0
commit 4912f9d5f0
parent a463fb7668
2 changed files with 13 additions and 13 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1671,28 +1671,28 @@ int llama_model_quantize(
 // Returns the KV cache that will contain the context for the
 // ongoing prediction with the model.
 const uint8_t * llama_get_kv_cache(struct llama_context * ctx) {
-  return ctx->model.kv_self.buf.data();
+    return ctx->model.kv_self.buf.data();
 }

 // Returns the size of the KV cache
 size_t llama_get_kv_cache_size(struct llama_context * ctx) {
-  return ctx->model.kv_self.buf.size();
+    return ctx->model.kv_self.buf.size();
 }

 int llama_get_kv_cache_token_count(struct llama_context * ctx) {
-  return ctx->model.kv_self.n;
+    return ctx->model.kv_self.n;
 }

 // Sets the KV cache containing the current context for the model
 void llama_set_kv_cache(
        struct llama_context * ctx,
-                     const uint8_t * kv_cache,
-                        size_t n_size,
-                           int n_token_count) {
-  // Make sure we have the same kv cache setup
-  LLAMA_ASSERT(ctx->model.kv_self.buf.size() == n_size);
-  memcpy(ctx->model.kv_self.buf.data(), kv_cache, n_size);
-  ctx->model.kv_self.n = n_token_count;
+               const uint8_t * kv_cache,
+                      size_t   n_size,
+                         int   n_token_count) {
+    // Make sure we have the same kv cache setup
+    LLAMA_ASSERT(ctx->model.kv_self.buf.size() == n_size);
+    memcpy(ctx->model.kv_self.buf.data(), kv_cache, n_size);
+    ctx->model.kv_self.n = n_token_count;
 }

 int llama_eval(
--- a/llama.h
+++ b/llama.h
@ -96,9 +96,9 @@ extern "C" {
    // Sets the KV cache containing the current context for the model
    LLAMA_API void llama_set_kv_cache(
            struct llama_context * ctx,
-                         const uint8_t * kv_cache,
-                            size_t n_size,
-                               int n_token_count);
+                   const uint8_t * kv_cache,
+                          size_t   n_size,
+                             int   n_token_count);

    // Run the llama inference to obtain the logits and probabilities for the next token.
    // tokens + n_tokens is the provided batch of new tokens to process