change gguf KV from clip to vit

This commit is contained in:
Xuan Son Nguyen 2025-01-21 10:51:26 +01:00
parent 4a7ab89d75
commit 431bb08059
6 changed files with 103 additions and 103 deletions

View file

@ -215,29 +215,29 @@ class Keys:
LORA_ALPHA = "adapter.lora.alpha"
class Vision:
# only support vision.type = "clip-vit" for now
# only support vision.type = "vit" for now
TYPE = "vision.type"
IMAGE_SIZE = "vision.image_size"
PATCH_SIZE = "vision.patch_size"
IMAGE_MEAN = "vision.image_mean"
IMAGE_STD = "vision.image_std"
class Clip:
ARCHITECTURE = "vision.clip.architecture"
CONTEXT_LENGTH = "vision.clip.context_length"
EMBEDDING_LENGTH = "vision.clip.embedding_length"
BLOCK_COUNT = "vision.clip.block_count"
FEED_FORWARD_LENGTH = "vision.clip.feed_forward_length"
PROJECTION_TYPE = "vision.clip.projection_type"
PROJECTION_DIM = "vision.clip.projection_dim"
USE_GELU = "vision.clip.use_gelu"
MAX_POS_EMBEDDING = "vision.clip.max_position_embeddings"
MAX_SLICES = "vision.clip.max_slices"
PROJECTOR_TYPE = "vision.clip.projector_type"
SELECT_LAYER = "vision.clip.select_layer"
PATCH_MERGE_TYPE = "vision.clip.patch_merge_type"
HEAD_COUNT = "vision.clip.attention.head_count"
LAYERNORM_EPS = "vision.clip.attention.layer_norm_epsilon"
class Vit:
ARCHITECTURE = "vision.vit.architecture"
CONTEXT_LENGTH = "vision.vit.context_length"
EMBEDDING_LENGTH = "vision.vit.embedding_length"
BLOCK_COUNT = "vision.vit.block_count"
FEED_FORWARD_LENGTH = "vision.vit.feed_forward_length"
PROJECTION_TYPE = "vision.vit.projection_type"
PROJECTION_DIM = "vision.vit.projection_dim"
USE_GELU = "vision.vit.use_gelu"
MAX_POS_EMBEDDING = "vision.vit.max_position_embeddings"
MAX_SLICES = "vision.vit.max_slices"
PROJECTOR_TYPE = "vision.vit.projector_type"
SELECT_LAYER = "vision.vit.select_layer"
PATCH_MERGE_TYPE = "vision.vit.patch_merge_type"
HEAD_COUNT = "vision.vit.attention.head_count"
LAYERNORM_EPS = "vision.vit.attention.layer_norm_epsilon"
#
# recommended mapping of model tensor names for storage in gguf

View file

@ -886,46 +886,46 @@ class GGUFWriter:
def add_vision_patch_size(self, value: int) -> None:
self.add_uint32(Keys.Vision.PATCH_SIZE, value)
def add_vision_clip_architecture(self, value: str) -> None:
self.add_string(Keys.Vision.Clip.ARCHITECTURE, value)
def add_vision_vit_architecture(self, value: str) -> None:
self.add_string(Keys.Vision.Vit.ARCHITECTURE, value)
def add_vision_clip_context_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.CONTEXT_LENGTH, value)
def add_vision_vit_context_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.CONTEXT_LENGTH, value)
def add_vision_clip_embedding_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.EMBEDDING_LENGTH, value)
def add_vision_vit_embedding_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.EMBEDDING_LENGTH, value)
def add_vision_clip_block_count(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.BLOCK_COUNT, value)
def add_vision_vit_block_count(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.BLOCK_COUNT, value)
def add_vision_clip_feed_forward_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.FEED_FORWARD_LENGTH, value)
def add_vision_vit_feed_forward_length(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.FEED_FORWARD_LENGTH, value)
def add_vision_clip_head_count(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.HEAD_COUNT, value)
def add_vision_vit_head_count(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.HEAD_COUNT, value)
def add_vision_clip_max_position_embeddings(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.MAX_POS_EMBEDDING, value)
def add_vision_vit_max_position_embeddings(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.MAX_POS_EMBEDDING, value)
def add_vision_clip_projector_type(self, value: CLIPProjectorType) -> None:
self.add_string(Keys.Vision.Clip.PROJECTOR_TYPE, value.value)
def add_vision_vit_projector_type(self, value: CLIPProjectorType) -> None:
self.add_string(Keys.Vision.Vit.PROJECTOR_TYPE, value.value)
def add_vision_clip_max_slices(self, value: int) -> None:
self.add_uint32(Keys.Vision.Clip.MAX_SLICES, value)
def add_vision_vit_max_slices(self, value: int) -> None:
self.add_uint32(Keys.Vision.Vit.MAX_SLICES, value)
def add_vision_clip_select_layer(self, value: int) -> None:
self.add_int32(Keys.Vision.Clip.SELECT_LAYER, value)
def add_vision_vit_select_layer(self, value: int) -> None:
self.add_int32(Keys.Vision.Vit.SELECT_LAYER, value)
def add_vision_clip_patch_merge_type(self, value: CLIPPatchMergeType) -> None:
self.add_string(Keys.Vision.Clip.PATCH_MERGE_TYPE, value.value)
def add_vision_vit_patch_merge_type(self, value: CLIPPatchMergeType) -> None:
self.add_string(Keys.Vision.Vit.PATCH_MERGE_TYPE, value.value)
def add_vision_clip_layer_norm_epsilon(self, value: float) -> None:
self.add_float32(Keys.Vision.Clip.LAYERNORM_EPS, value)
def add_vision_vit_layer_norm_epsilon(self, value: float) -> None:
self.add_float32(Keys.Vision.Vit.LAYERNORM_EPS, value)
def add_vision_clip_image_mean(self, value: Sequence[float]) -> None:
def add_vision_vit_image_mean(self, value: Sequence[float]) -> None:
self.add_array(Keys.Vision.IMAGE_MEAN, value)
def add_vision_clip_image_std(self, value: Sequence[float]) -> None:
def add_vision_vit_image_std(self, value: Sequence[float]) -> None:
self.add_array(Keys.Vision.IMAGE_STD, value)
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None: