Default values for add_bos_token and add_eos_token
This commit is contained in:
parent
9b21dc3aef
commit
f2340b43fc
1 changed files with 3 additions and 0 deletions
|
@ -293,6 +293,9 @@ def main(argv: list[str] = None):
|
||||||
model = LibLlamaModel(LibLlama(), args.vocab_file, mparams=dict(vocab_only=True), cparams=dict(n_ctx=4096))
|
model = LibLlamaModel(LibLlama(), args.vocab_file, mparams=dict(vocab_only=True), cparams=dict(n_ctx=4096))
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.dir_tokenizer)
|
tokenizer = AutoTokenizer.from_pretrained(args.dir_tokenizer)
|
||||||
|
|
||||||
|
tokenizer.add_bos_token = getattr(tokenizer, "add_bos_token", True)
|
||||||
|
tokenizer.add_eos_token = getattr(tokenizer, "add_eos_token", False)
|
||||||
|
|
||||||
def func_tokenize1(text: str):
|
def func_tokenize1(text: str):
|
||||||
return model.tokenize(text, add_special=True, parse_special=True)
|
return model.tokenize(text, add_special=True, parse_special=True)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue