convert : fix pre-tokenizer type writing
This commit is contained in:
parent
43e12ce8e5
commit
1b9b79dd14
4 changed files with 3 additions and 1 deletions
|
@ -408,6 +408,8 @@ class Model(ABC):
|
||||||
if res is None:
|
if res is None:
|
||||||
raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
|
raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
def _set_vocab_gpt2(self) -> None:
|
def _set_vocab_gpt2(self) -> None:
|
||||||
tokens, toktypes, tokpre = self.get_vocab_base()
|
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||||
self.gguf_writer.add_tokenizer_model("gpt2")
|
self.gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
|
|
@ -4290,7 +4290,7 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tokenizer_pre.empty()) {
|
if (tokenizer_pre.empty()) {
|
||||||
LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'", __func__);
|
LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'\n", __func__);
|
||||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
} else if (tokenizer_pre == "default") {
|
} else if (tokenizer_pre == "default") {
|
||||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue