diff --git a/llama.cpp b/llama.cpp
index f6f4dda66..cf413d983 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1686,7 +1686,7 @@ int llama_get_kv_cache_token_count(struct llama_context * ctx) {
 // Sets the KV cache containing the current context for the model
 void llama_set_kv_cache(
         struct llama_context * ctx,
-                     uint8_t * kv_cache,
+                     const uint8_t * kv_cache,
                         size_t n_size,
                            int n_token_count) {
   // Make sure we have the same kv cache setup