convert.py: Set gpt2 as tokenizer model when using BPE
This commit is contained in:
parent
58fa4dc870
commit
ce005285aa
1 changed files with 6 additions and 1 deletions
|
@ -846,7 +846,12 @@ class OutputFile:
|
|||
scores.append(score)
|
||||
toktypes.append(toktype)
|
||||
|
||||
self.gguf.add_tokenizer_model("llama")
|
||||
if isinstance(vocab, SentencePieceVocab):
|
||||
self.gguf.add_tokenizer_model("llama")
|
||||
elif isinstance(vocab, BpeVocab):
|
||||
self.gguf.add_tokenizer_model("gpt2")
|
||||
else:
|
||||
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
|
||||
self.gguf.add_token_list(tokens)
|
||||
self.gguf.add_token_scores(scores)
|
||||
self.gguf.add_token_types(toktypes)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue