From 8ebdaec76169c58472f1a97a71fc3548578eae00 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Fri, 2 Feb 2024 00:25:08 +0100 Subject: [PATCH] Update convert-image-encoder-to-gguf.py --- .../llava/convert-image-encoder-to-gguf.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert-image-encoder-to-gguf.py index 82acfb225..115b6b35b 100644 --- a/examples/llava/convert-image-encoder-to-gguf.py +++ b/examples/llava/convert-image-encoder-to-gguf.py @@ -203,8 +203,41 @@ if has_vision_encoder: fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), v_hparams["layer_norm_eps"]) block_count = v_hparams["num_hidden_layers"] - 1 if has_llava_projector else v_hparams["num_hidden_layers"] fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), block_count) + # /** + # "image_grid_pinpoints": [ + # [ + # 336, + # 672 + # ], + # [ + # 672, + # 336 + # ], + # [ + # 672, + # 672 + # ], + # [ + # 1008, + # 336 + # ], + # [ + # 336, + # 1008 + # ] + # ], + # Flattened: + # [ + # 336, 672, + # 672, 336, + # 672, 672, + # 1008, 336, + # 336, 1008 + # ] + # * + # */ if "image_grid_pinpoints" in v_hparams: - # no nested array - flatten it + # flatten it image_grid_pinpoints = [] for pinpoint in v_hparams["image_grid_pinpoints"]: image_grid_pinpoints.extend(pinpoint)