From ad45bb37f46e69c1cccd820b5187fd3916f79e4d Mon Sep 17 00:00:00 2001 From: akawrykow Date: Tue, 29 Aug 2023 17:12:33 -0700 Subject: [PATCH] use vocab size from config.json --- convert-falcon-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py index 5694cd358..fab11b3bf 100755 --- a/convert-falcon-hf-to-gguf.py +++ b/convert-falcon-hf-to-gguf.py @@ -148,7 +148,7 @@ if Path(dir_model + "/tokenizer.json").is_file(): print("gguf: get gpt2 tokenizer vocab") - vocab_size = len(tokenizer_json["model"]["vocab"]) + vocab_size = hparams["vocab_size"] # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py tokenizer = AutoTokenizer.from_pretrained(dir_model)