From 7688d7204ff13648e2c7555c05cd4bae93995a3c Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Thu, 23 Nov 2023 07:21:19 -0700 Subject: [PATCH] Fix max contiguous empty cells index calculation Make dump functions deal with lengths or sequences counts > 10 better --- common/common.cpp | 16 ++++++++++------ llama.cpp | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1704b08a2..c9b13db27 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1393,7 +1393,9 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l // void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) { - printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, max contiguous cells=%d @ %d\n", + static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous_cells, view.max_contiguous_cells_idx); llama_kv_cache_view_cell * c_curr = view.cells; struct llama_kv_cache_view_cell_sequence * cs_curr = view.cells_sequences; @@ -1405,13 +1407,14 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) { for (int j = 0; j < view.n_max_seq; j++) { if (cs_curr[j].seq_id >= 0) { seq_count++; } } - putchar(seq_count == 0 ? '.' : ('0' + (std::min(9, seq_count)))); + putchar(slot_chars[std::min(sizeof(slot_chars) - 1, size_t(seq_count))]); } printf("\n=== Done dumping\n"); } void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { - printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, max contiguous cells=%d @ %d\n", + static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous_cells, view.max_contiguous_cells_idx); std::unordered_map seqs; @@ -1421,16 +1424,17 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { for (int j = 0; j < view.n_max_seq; j++) { if (cs_curr[j].seq_id < 0) { continue; } if (seqs.find(cs_curr[j].seq_id) == seqs.end()) { + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } seqs[cs_curr[j].seq_id] = seqs.size(); - if (seqs.size() >= 10) { break; } } } - if (seqs.size() >= 10) { break; } + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } } printf("=== Sequence legend: "); for (const auto & it : seqs) { printf("%zu=%d, ", it.second, it.first); } + printf("'+'=other sequence ids"); c_curr = view.cells; cs_curr = view.cells_sequences; @@ -1441,7 +1445,7 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { for (int j = 0; j < view.n_max_seq; j++) { if (cs_curr[j].seq_id >= 0) { const auto & it = seqs.find(cs_curr[j].seq_id); - putchar(it != seqs.end() ? int('0' + it->second) : '+'); + putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); } else { putchar('.'); } diff --git a/llama.cpp b/llama.cpp index c564d636d..5d1de9eec 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8857,8 +8857,8 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k if (curr_size > 0) { if (curr_contig_idx >= 0 && uint32_t(i - curr_contig_idx) > max_contig) { - max_contig_idx = i; max_contig = i - curr_contig_idx; + max_contig_idx = curr_contig_idx; } curr_contig_idx = -1; } else if (curr_contig_idx < 0) {