llama : add comments
This commit is contained in:
parent
0b72ded501
commit
5a122c25a0
1 changed files with 9 additions and 3 deletions
12
llama.cpp
12
llama.cpp
|
@ -8091,9 +8091,7 @@ static int llama_decode_internal(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy the KV cache to the host memory and reshuffle the cells to the beginning of the cache
|
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
|
||||||
// this way we eliminate any empty holes that may have been left by previous KV cache operations
|
|
||||||
//
|
|
||||||
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||||
auto & kv_self = lctx.kv_self;
|
auto & kv_self = lctx.kv_self;
|
||||||
|
|
||||||
|
@ -8108,6 +8106,11 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||||
uint32_t n_moves = 0;
|
uint32_t n_moves = 0;
|
||||||
|
|
||||||
// determine which KV cells to move where
|
// determine which KV cells to move where
|
||||||
|
//
|
||||||
|
// cell i moves to ids[i]
|
||||||
|
//
|
||||||
|
// if ids[i] == i || ids[i] == n_kv, then cell i is not moved
|
||||||
|
//
|
||||||
std::vector<uint32_t> ids(n_kv, n_kv);
|
std::vector<uint32_t> ids(n_kv, n_kv);
|
||||||
|
|
||||||
for (uint32_t i0 = 0; i0 < n_used; ++i0) {
|
for (uint32_t i0 = 0; i0 < n_used; ++i0) {
|
||||||
|
@ -8139,11 +8142,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||||
|
|
||||||
// non-empty cell which is not yet moved
|
// non-empty cell which is not yet moved
|
||||||
nf++;
|
nf++;
|
||||||
|
|
||||||
if (nf == nh) {
|
if (nf == nh) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this can only happen if `n_used` is not accurate, which would be a bug
|
||||||
GGML_ASSERT(nf == nh && "KV defrag bug: nf != nh");
|
GGML_ASSERT(nf == nh && "KV defrag bug: nf != nh");
|
||||||
|
|
||||||
nf = 0;
|
nf = 0;
|
||||||
|
@ -8156,6 +8161,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this cell goes to (i0 + nf)
|
||||||
ids[i1] = i0 + nf;
|
ids[i1] = i0 + nf;
|
||||||
|
|
||||||
// move the cell meta data
|
// move the cell meta data
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue