llama : comments

2024-02-25 15:30:06 +02:00 · 2024-02-25 15:30:06 +02:00 · 1b6aeb8309
commit 1b6aeb8309
parent d141c749d9
1 changed files with 5 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -8040,7 +8040,7 @@ static int llama_decode_internal(
 //
 //     abs(cell[i0].pos - cell[i1].pos) <= compress_delta
 //
-//   - move the KV cache to the Host memory for easier maniiplation
+//   - move the KV cache to the host memory for easier manipulation
 //   - processing is done layer-by-layer
 //   - convert the KV data to F32
 //   - merge the KV data (different ways to merge)
@ -8269,11 +8269,14 @@ static void llama_kv_cache_compress_internal(struct llama_context & lctx) {
 }

 // copy the KV cache to the host memory and reshuffle the cells to the beginning of the cache
-// removing any empty segments that may have been left by previous KV cache operations
+// this way we eliminate any empty segments that may have been left by previous KV cache operations
+//
 // TODO: optimizations are possible:
 //       - multiple threads
 //       - avoid copying to the host memory when already there
+//
 // TODO: can we do all this on-device?
+//
 static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
    auto & kv_self = lctx.kv_self;