cont : move kv_self update to llama_context

ggml-ci
2025-01-16 21:55:12 +02:00 · 2025-01-16 21:55:12 +02:00 · b4ec1d4429
commit b4ec1d4429
parent f2524c0e41
3 changed files with 157 additions and 154 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@ -79,6 +79,13 @@ struct llama_context {
    ggml_abort_callback abort_callback      = nullptr;
    void *              abort_callback_data = nullptr;

+    // returns the result of ggml_backend_sched_graph_compute_async execution
+    enum ggml_status compute_graph(
+                ggml_cgraph * graph,
+                       bool   batched);
+
+    llama_pos pos_max() const;
+
    void reset();

    void prepare_k_shift();
@ -129,6 +136,9 @@ struct llama_context {
    struct ggml_tensor * inp_KQ_mask_cross;   // F32 [n_outputs_enc, n_batch]
    struct ggml_tensor * inp_K_shift;         // I32 [kv_size]

+    // return true if need to reserve new worst-case graph
+    bool kv_self_update();
+
    void build_attn_inp(
            ggml_context * ctx0,
                 int32_t   n_tokens,