convert : fix pre-tokenizer type writing
This commit is contained in:
parent
43e12ce8e5
commit
1b9b79dd14
4 changed files with 3 additions and 1 deletions
|
@ -408,6 +408,8 @@ class Model(ABC):
|
|||
if res is None:
|
||||
raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
|
||||
|
||||
return res
|
||||
|
||||
def _set_vocab_gpt2(self) -> None:
|
||||
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||
self.gguf_writer.add_tokenizer_model("gpt2")
|
||||
|
|
|
@ -4290,7 +4290,7 @@ static void llm_load_vocab(
|
|||
}
|
||||
|
||||
if (tokenizer_pre.empty()) {
|
||||
LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'", __func__);
|
||||
LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'\n", __func__);
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||
} else if (tokenizer_pre == "default") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue