cont : move kv_self update to llama_context

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-01-16 21:55:12 +02:00
parent f2524c0e41
commit b4ec1d4429
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 157 additions and 154 deletions

View file

@ -79,6 +79,13 @@ struct llama_context {
ggml_abort_callback abort_callback = nullptr;
void * abort_callback_data = nullptr;
// returns the result of ggml_backend_sched_graph_compute_async execution
enum ggml_status compute_graph(
ggml_cgraph * graph,
bool batched);
llama_pos pos_max() const;
void reset();
void prepare_k_shift();
@ -129,6 +136,9 @@ struct llama_context {
struct ggml_tensor * inp_KQ_mask_cross; // F32 [n_outputs_enc, n_batch]
struct ggml_tensor * inp_K_shift; // I32 [kv_size]
// return true if need to reserve new worst-case graph
bool kv_self_update();
void build_attn_inp(
ggml_context * ctx0,
int32_t n_tokens,