convert.py: Set gpt2 as tokenizer model when using BPE

This commit is contained in:
KerfuffleV2 2023-08-29 12:01:59 -06:00
parent 58fa4dc870
commit ce005285aa

View file

@ -846,7 +846,12 @@ class OutputFile:
scores.append(score)
toktypes.append(toktype)
self.gguf.add_tokenizer_model("llama")
if isinstance(vocab, SentencePieceVocab):
self.gguf.add_tokenizer_model("llama")
elif isinstance(vocab, BpeVocab):
self.gguf.add_tokenizer_model("gpt2")
else:
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
self.gguf.add_token_list(tokens)
self.gguf.add_token_scores(scores)
self.gguf.add_token_types(toktypes)