convert-gptneox-hf-to-gguf and convert: Only handle merges for BPE tokenizer
This commit is contained in:
parent
61911ca4db
commit
0c620ef63b
2 changed files with 3 additions and 3 deletions
|
@ -150,7 +150,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
|
|||
|
||||
gguf_writer.add_token_list(tokens)
|
||||
|
||||
special_vocab = gguf.SpecialVocab(Path(dir_model))
|
||||
special_vocab = gguf.SpecialVocab(Path(dir_model), load_merges = True)
|
||||
special_vocab.add_to_gguf(gguf_writer)
|
||||
|
||||
# TENSORS
|
||||
|
|
|
@ -1159,7 +1159,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
|||
assert args.outfile, "need --outfile if using --vocab-only"
|
||||
# FIXME: Try to respect vocab_dir somehow?
|
||||
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent)
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent, load_merges = args.vocabtype == 'bpe')
|
||||
outfile = args.outfile
|
||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab)
|
||||
print(f"Wrote {outfile}")
|
||||
|
@ -1171,7 +1171,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
|||
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
||||
vocab = load_vocab(vocab_dir, args.vocabtype)
|
||||
# FIXME: Try to respect vocab_dir somehow?
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent)
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent, load_merges = args.vocabtype == 'bpe')
|
||||
|
||||
model = model_plus.model
|
||||
model = convert_model_names(model, params)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue