Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
This commit is contained in:
Douglas Hanley 2024-02-15 11:21:49 -06:00 committed by GitHub
parent c06e45d729
commit 4524290e87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 94 additions and 31 deletions

View file

@ -40,7 +40,7 @@ class Keys:
TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
EXPERT_COUNT = "{arch}.expert_count"
EXPERT_USED_COUNT = "{arch}.expert_used_count"
POOLING_LAYER = "{arch}.pooling_layer"
POOLING_TYPE = "{arch}.pooling_type"
class Attention:
HEAD_COUNT = "{arch}.attention.head_count"
@ -561,6 +561,12 @@ class RopeScalingType(Enum):
YARN = 'yarn'
class PoolingType(IntEnum):
NONE = 0
MEAN = 1
CLS = 2
class GGMLQuantizationType(IntEnum):
F32 = 0
F16 = 1