use vocab size from config.json
This commit is contained in:
parent
22c3522d78
commit
ad45bb37f4
1 changed files with 1 additions and 1 deletions
|
@ -148,7 +148,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
|
|||
|
||||
print("gguf: get gpt2 tokenizer vocab")
|
||||
|
||||
vocab_size = len(tokenizer_json["model"]["vocab"])
|
||||
vocab_size = hparams["vocab_size"]
|
||||
|
||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue