llama : update llama_kv_self API

ggml-ci
2025-01-14 16:47:34 +02:00 · 2025-01-14 16:47:34 +02:00 · 17b363afd3
commit 17b363afd3
parent fd05ab87aa
30 changed files with 387 additions and 205 deletions
--- a/examples/simple-chat/simple-chat.cpp
+++ b/examples/simple-chat/simple-chat.cpp
@ -88,8 +88,6 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_kv_cache * kv = llama_get_kv_cache(ctx);
-
    // initialize the sampler
    llama_sampler * smpl = llama_sampler_chain_init(llama_sampler_chain_default_params());
    llama_sampler_chain_add(smpl, llama_sampler_init_min_p(0.05f, 1));
@ -100,7 +98,7 @@ int main(int argc, char ** argv) {
    auto generate = [&](const std::string & prompt) {
        std::string response;

-        const bool is_first = llama_kv_cache_used_cells(kv) == 0;
+        const bool is_first = llama_kv_self_used_cells(ctx) == 0;

        // tokenize the prompt
        const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true);
@ -115,7 +113,7 @@ int main(int argc, char ** argv) {
        while (true) {
            // check if we have enough space in the context to evaluate this batch
            int n_ctx = llama_n_ctx(ctx);
-            int n_ctx_used = llama_kv_cache_used_cells(kv);
+            int n_ctx_used = llama_kv_self_used_cells(ctx);
            if (n_ctx_used + batch.n_tokens > n_ctx) {
                printf("\033[0m\n");
                fprintf(stderr, "context size exceeded\n");