examples : replace llama_kv_cache_seq_* with llama_past_seq_*

2024-06-10 14:44:42 -04:00 · 2024-06-10 14:44:42 -04:00 · 43d8d4bf9e
commit 43d8d4bf9e
parent 372482dffe
23 changed files with 125 additions and 112 deletions
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -400,7 +400,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
        const auto t_start = std::chrono::high_resolution_clock::now();

        // clear the KV cache
-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        for (int j = 0; j < num_batches; ++j) {
            const int batch_start = start + j * n_batch;
@ -575,7 +575,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
        const auto t_start = std::chrono::high_resolution_clock::now();

        // clear the KV cache
-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        for (int j = 0; j < num_batches; ++j) {
            const int batch_start = start + j * n_batch;
@ -944,7 +944,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
            return;
        }

-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        // decode all tasks [i0, i1)
        if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) {
@ -1221,7 +1221,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
            return;
        }

-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        // decode all tasks [i0, i1)
        if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) {
@ -1594,7 +1594,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
            return;
        }

-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        // decode all tasks [i0, i1)
        if (!decode_helper(ctx, batch, batch_logits, n_batch, n_vocab)) {
@ -1780,7 +1780,7 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
        }

        // clear the KV cache
-        llama_kv_cache_clear(ctx);
+        llama_past_clear(ctx);

        for (int j = 0; j < num_batches; ++j) {
            const int batch_start = start + j * n_batch;