use vocab size from config.json

This commit is contained in:
akawrykow 2023-08-29 17:12:33 -07:00
parent 22c3522d78
commit ad45bb37f4

View file

@ -148,7 +148,7 @@ if Path(dir_model + "/tokenizer.json").is_file():
print("gguf: get gpt2 tokenizer vocab") print("gguf: get gpt2 tokenizer vocab")
vocab_size = len(tokenizer_json["model"]["vocab"]) vocab_size = hparams["vocab_size"]
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
tokenizer = AutoTokenizer.from_pretrained(dir_model) tokenizer = AutoTokenizer.from_pretrained(dir_model)