examplse : de-shadow

ggml-ci
2025-01-12 14:25:32 +02:00 · 2025-01-12 14:25:32 +02:00 · 9a735ae6d8
commit 9a735ae6d8
parent 82caffa74e
16 changed files with 152 additions and 159 deletions
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -1039,41 +1039,40 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
            }

            { // attention
-                int hidden_size = 4096;
-                const int d_head = 128;
-                int n_head = hidden_size/d_head;
+                int hidden_size_cur = 4096;
                int num_query = 96;
                if (ctx->minicpmv_version == 2) {
-                    hidden_size = 4096;
-                    n_head = hidden_size/d_head;
+                    hidden_size_cur = 4096;
                    num_query = 96;
                }
                else if (ctx->minicpmv_version == 3) {
-                    hidden_size = 3584;
-                    n_head = hidden_size/d_head;
+                    hidden_size_cur = 3584;
                    num_query = 64;
                }

+                const int d_head_cur = 128;
+                const int n_head_cur = hidden_size_cur/d_head_cur;
+
                struct ggml_tensor * Q = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q), model.mm_model_attn_q_b);
-                Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
+                Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head_cur));
                struct ggml_tensor * K = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_k_w, k), model.mm_model_attn_k_b);
                struct ggml_tensor * V = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_v_w, v), model.mm_model_attn_v_b);
                // permute
-                Q = ggml_reshape_4d(ctx0, Q, d_head, n_head, num_query, batch_size);
+                Q = ggml_reshape_4d(ctx0, Q, d_head_cur, n_head_cur, num_query, batch_size);
                Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
-                Q = ggml_reshape_3d(ctx0, Q, d_head, num_query, n_head * batch_size);
-                K = ggml_reshape_4d(ctx0, K, d_head, n_head, num_positions, batch_size);
+                Q = ggml_reshape_3d(ctx0, Q, d_head_cur, num_query, n_head_cur * batch_size);
+                K = ggml_reshape_4d(ctx0, K, d_head_cur, n_head_cur, num_positions, batch_size);
                K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
-                K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
-                V = ggml_reshape_4d(ctx0, V, d_head, n_head, num_positions, batch_size);
+                K = ggml_reshape_3d(ctx0, K, d_head_cur, num_positions, n_head_cur * batch_size);
+                V = ggml_reshape_4d(ctx0, V, d_head_cur, n_head_cur, num_positions, batch_size);
                V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3));
-                V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size);
+                V = ggml_reshape_3d(ctx0, V, num_positions, d_head_cur, n_head_cur * batch_size);
                struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
                KQ = ggml_soft_max_inplace(ctx0, KQ);
                struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ);
-                KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_query, n_head, batch_size);
+                KQV = ggml_reshape_4d(ctx0, KQV, d_head_cur, num_query, n_head_cur, batch_size);
                KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
-                KQV = ggml_cont_3d(ctx0, KQV, hidden_size, num_query, batch_size);
+                KQV = ggml_cont_3d(ctx0, KQV, hidden_size_cur, num_query, batch_size);

                embeddings = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_o_w, KQV), model.mm_model_attn_o_b);
            }
@ -1113,12 +1112,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
 struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    struct ggml_context * meta = NULL;

-    struct gguf_init_params params = {
+    struct gguf_init_params params_meta = {
        /*.no_alloc = */ true,
        /*.ctx      = */ &meta,
    };

-    struct gguf_context * ctx = gguf_init_from_file(fname, params);
+    struct gguf_context * ctx = gguf_init_from_file(fname, params_meta);
    if (!ctx) {
        throw std::runtime_error(format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname));
    }
@ -1310,13 +1309,13 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
    // load tensors
    {
        std::vector<uint8_t> read_buf;
-        struct ggml_init_params params = {
+        struct ggml_init_params params_data = {
            /*.mem_size =*/ (n_tensors + 1) * ggml_tensor_overhead(),
            /*.mem_buffer =*/ NULL,
            /*.no_alloc =*/ true,
        };

-        new_clip->ctx_data = ggml_init(params);
+        new_clip->ctx_data = ggml_init(params_data);
        if (!new_clip->ctx_data) {
            LOG_ERR("%s: ggml_init() failed\n", __func__);
            clip_free(new_clip);