diff --git a/llama.cpp b/llama.cpp
index 73ad59ca5..f6f4dda66 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1670,7 +1670,7 @@ int llama_model_quantize(
 
 // Returns the KV cache that will contain the context for the
 // ongoing prediction with the model.
-uint8_t* llama_get_kv_cache(struct llama_context * ctx) {
+const uint8_t * llama_get_kv_cache(struct llama_context * ctx) {
   return ctx->model.kv_self.buf.data();
 }