Use correct type of pooling for embedding models (#5500)
Use correct type of pooling for embedding models
This commit is contained in:
parent
c06e45d729
commit
4524290e87
5 changed files with 94 additions and 31 deletions
|
@ -40,7 +40,7 @@ class Keys:
|
|||
TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
||||
EXPERT_COUNT = "{arch}.expert_count"
|
||||
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
||||
POOLING_LAYER = "{arch}.pooling_layer"
|
||||
POOLING_TYPE = "{arch}.pooling_type"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "{arch}.attention.head_count"
|
||||
|
@ -561,6 +561,12 @@ class RopeScalingType(Enum):
|
|||
YARN = 'yarn'
|
||||
|
||||
|
||||
class PoolingType(IntEnum):
|
||||
NONE = 0
|
||||
MEAN = 1
|
||||
CLS = 2
|
||||
|
||||
|
||||
class GGMLQuantizationType(IntEnum):
|
||||
F32 = 0
|
||||
F16 = 1
|
||||
|
|
|
@ -19,6 +19,7 @@ from .constants import (
|
|||
GGUFValueType,
|
||||
Keys,
|
||||
RopeScalingType,
|
||||
PoolingType,
|
||||
TokenType,
|
||||
)
|
||||
|
||||
|
@ -360,8 +361,8 @@ class GGUFWriter:
|
|||
def add_causal_attention(self, value: bool) -> None:
|
||||
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
||||
|
||||
def add_pooling_layer(self, value: bool) -> None:
|
||||
self.add_bool(Keys.LLM.POOLING_LAYER.format(arch=self.arch), value)
|
||||
def add_pooling_type(self, value: PoolingType) -> None:
|
||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value)
|
||||
|
||||
def add_rope_dimension_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue