use vocab size from config.json
This commit is contained in:
parent
22c3522d78
commit
ad45bb37f4
1 changed files with 1 additions and 1 deletions
|
@ -148,7 +148,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
|
||||||
|
|
||||||
print("gguf: get gpt2 tokenizer vocab")
|
print("gguf: get gpt2 tokenizer vocab")
|
||||||
|
|
||||||
vocab_size = len(tokenizer_json["model"]["vocab"])
|
vocab_size = hparams["vocab_size"]
|
||||||
|
|
||||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue