fix: Use gpt2 tokenizer for roberta and add eos/bos tokens
Branch: RobertaTokenizer Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
a76c56fa1a
commit
a2e03b826f
1 changed files with 27 additions and 2 deletions
|
@ -663,7 +663,11 @@ class Model:
|
|||
res = "minerva-7b"
|
||||
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
|
||||
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
|
||||
res = "roberta-bpe"
|
||||
# NOTE: The Roberta tokenizer is the same as GPT-2, but it always
|
||||
# adds the cls/sep tokens as bos/eos. This is handled as a
|
||||
# post-processor in tokenizers, so the chkhsh is different, but
|
||||
# it still maps to gpt-2 internally.
|
||||
res = "gpt-2"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
@ -2544,7 +2548,7 @@ class InternLM2Model(Model):
|
|||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
|
||||
@Model.register("BertModel", "CamembertModel", "RobertaModel")
|
||||
@Model.register("BertModel", "CamembertModel")
|
||||
class BertModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.BERT
|
||||
|
||||
|
@ -2617,6 +2621,27 @@ class BertModel(Model):
|
|||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
|
||||
@Model.register("RobertaModel")
|
||||
class RobertaModel(BertModel):
|
||||
model_arch = gguf.MODEL_ARCH.BERT
|
||||
|
||||
def set_vocab(self):
|
||||
"""Support BPE tokenizers for roberta models"""
|
||||
bpe_tok_path = self.dir_model / "tokenizer.json"
|
||||
if bpe_tok_path.exists():
|
||||
self._set_vocab_gpt2()
|
||||
self.gguf_writer.add_add_bos_token(True)
|
||||
self.gguf_writer.add_add_eos_token(True)
|
||||
|
||||
# we need this to validate the size of the token_type embeddings
|
||||
# though currently we are passing all zeros to the token_type embeddings
|
||||
# "Sequence A" or "Sequence B"
|
||||
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
||||
|
||||
else:
|
||||
return super().set_vocab()
|
||||
|
||||
|
||||
@Model.register("NomicBertModel")
|
||||
class NomicBertModel(BertModel):
|
||||
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue