convert : fix pre-tokenizer type writing

This commit is contained in:
Georgi Gerganov 2024-04-26 20:55:14 +03:00
parent 43e12ce8e5
commit 1b9b79dd14
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 3 additions and 1 deletions

View file

@ -408,6 +408,8 @@ class Model(ABC):
if res is None:
raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
return res
def _set_vocab_gpt2(self) -> None:
tokens, toktypes, tokpre = self.get_vocab_base()
self.gguf_writer.add_tokenizer_model("gpt2")