llama : allow exporting a view of the KV cache (#4180)
* Allow exporting a view of the KV cache * Allow dumping the sequences per cell in common * Track max contiguous cells value and position as well * Fix max contiguous empty cells index calculation Make dump functions deal with lengths or sequences counts > 10 better * Fix off by one error in dump_kv_cache_view * Add doc comments for KV cache view functions Eliminate cell sequence struct; use llama_seq_id directly Minor cleanups
This commit is contained in:
parent
671f639c59
commit
5df7d06c42
5 changed files with 227 additions and 0 deletions
|
@ -172,6 +172,8 @@ int main(int argc, char ** argv) {
|
|||
int32_t n_total_gen = 0;
|
||||
int32_t n_cache_miss = 0;
|
||||
|
||||
struct llama_kv_cache_view kvc_view = llama_kv_cache_view_init(ctx, n_clients);
|
||||
|
||||
const auto t_main_start = ggml_time_us();
|
||||
|
||||
LOG_TEE("%s: Simulating parallel requests from clients:\n", __func__);
|
||||
|
@ -201,6 +203,9 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("Processing requests ...\n\n");
|
||||
|
||||
while (true) {
|
||||
llama_kv_cache_view_update(ctx, &kvc_view);
|
||||
dump_kv_cache_view_seqs(kvc_view, 40);
|
||||
|
||||
llama_batch_clear(batch);
|
||||
|
||||
// decode any currently ongoing sequences
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue