llava: add quantization for the visual projector LLAVA, Qwen2VL (#11644)

* Added quantization for visual projector
* Added README
* Fixed the clip quantize implementation in the file

* Fixed the gcc warning regarding minor linting

* Removed trailing whitespace
This commit is contained in:
SAMI 2025-02-05 14:45:40 +07:00 committed by GitHub
parent 9f4cc8f8d3
commit 1ec208083c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 113 additions and 5 deletions

View file

@ -2745,10 +2745,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
}
bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) {
ggml_type type = GGML_TYPE_Q4_1;
assert(itype < GGML_TYPE_COUNT);
type = static_cast<ggml_type>(itype);
ggml_type type = static_cast<ggml_type>(itype);
auto * ctx_clip = clip_model_load(fname_inp, 2);
@ -2801,8 +2799,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
}
}
// quantize only 2D tensors
quantize &= (ggml_n_dims(cur) == 2);
// quantize only 2D tensors and bigger than block size
quantize &= (ggml_n_dims(cur) == 2) && cur->ne[0] > ggml_blck_size(type);
if (quantize) {
new_type = type;