Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
This commit is contained in:
Douglas Hanley 2024-02-15 11:21:49 -06:00 committed by GitHub
parent c06e45d729
commit 4524290e87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 94 additions and 31 deletions

View file

@ -1650,7 +1650,29 @@ class BertModel(Model):
def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_causal_attention(False)
self.gguf_writer.add_pooling_layer(True)
# get pooling path
with open(self.dir_model / "modules.json", encoding="utf-8") as f:
modules = json.load(f)
pooling_path = None
for mod in modules:
if mod["type"] == "sentence_transformers.models.Pooling":
pooling_path = mod["path"]
break
# get pooling type
pooling_type = gguf.PoolingType.NONE
if pooling_path is not None:
with open(self.dir_model / pooling_path / "config.json", encoding="utf-8") as f:
pooling = json.load(f)
if pooling["pooling_mode_mean_tokens"]:
pooling_type = gguf.PoolingType.MEAN
elif pooling["pooling_mode_cls_token"]:
pooling_type = gguf.PoolingType.CLS
else:
raise NotImplementedError("Only MEAN and CLS pooling types supported")
self.gguf_writer.add_pooling_type(pooling_type.value)
def set_vocab(self):
path = self.dir_model