rename n_ctx to kv_size

2024-02-18 20:59:26 +01:00 · 2024-02-18 20:59:26 +01:00 · 606873401c
commit 606873401c
parent ef96e8b1f7
48 changed files with 403 additions and 393 deletions
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -230,7 +230,7 @@ static struct llava_context * llava_init(gpt_params * params) {
    }

    llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
-    ctx_params.n_ctx           = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
+    ctx_params.kv_size              = params->kv_size < 2048 ? 2048 : params->kv_size; // we need a longer context size to process image embeddings

    llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);

--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@ -103,15 +103,15 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
    const size_t num_images = num_patches_width * num_patches_height + 1;

    // TODO: size calculation is not calculated - it's only tens of MB
-    size_t ctx_size = 0;
+    size_t kv_size = 0;

    {
-        ctx_size += clip_embd_nbytes(ctx_clip) * num_images * 8; // image_features
-        ctx_size += 1024*1024 * ggml_type_size(GGML_TYPE_F32);
+        kv_size += clip_embd_nbytes(ctx_clip) * num_images * 8; // image_features
+        kv_size += 1024*1024 * ggml_type_size(GGML_TYPE_F32);
    }

    struct ggml_init_params params {
-        /*.mem_size   =*/ ctx_size,
+        /*.mem_size   =*/ kv_size,
        /*.mem_buffer =*/ NULL,
        /*.no_alloc   =*/ false, // NOTE: this should be false when using the legacy API
    };