llava: add quantization for the visual projector LLAVA, Qwen2VL (#11644)

* Added quantization for visual projector * Added README * Fixed the clip quantize implementation in the file * Fixed the gcc warning regarding minor linting * Removed trailing whitespace
2025-02-05 14:45:40 +07:00 · 2025-02-05 14:45:40 +07:00 · 1ec208083c
commit 1ec208083c
parent 9f4cc8f8d3
4 changed files with 113 additions and 5 deletions
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -2745,10 +2745,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
 }

 bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) {
-    ggml_type type = GGML_TYPE_Q4_1;
-
    assert(itype < GGML_TYPE_COUNT);
-    type = static_cast<ggml_type>(itype);
+    ggml_type type = static_cast<ggml_type>(itype);

    auto * ctx_clip = clip_model_load(fname_inp, 2);

@ -2801,8 +2799,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
            }
        }

-        // quantize only 2D tensors
-        quantize &= (ggml_n_dims(cur) == 2);
+        // quantize only 2D tensors and bigger than block size
+        quantize &= (ggml_n_dims(cur) == 2) && cur->ne[0] > ggml_blck_size(type);

        if (quantize) {
            new_type = type;