diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py index e6c3d6935..95704ec43 100755 --- a/convert-gptneox-hf-to-gguf.py +++ b/convert-gptneox-hf-to-gguf.py @@ -131,7 +131,7 @@ tokenizer = AutoTokenizer.from_pretrained(dir_model) reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()} for i in range(vocab_size): - tokens.append(reverse_vocab[i]) + tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]") scores.append(0.0) # dummy toktypes.append(gguf.TokenType.NORMAL) diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py index 90c3b5057..9d13637ce 100755 --- a/convert-starcoder-hf-to-gguf.py +++ b/convert-starcoder-hf-to-gguf.py @@ -121,7 +121,7 @@ tokenizer = AutoTokenizer.from_pretrained(dir_model) reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()} for i in range(vocab_size): - tokens.append(reverse_vocab[i]) + tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]") scores.append(0.0) # dummy toktypes.append(gguf.TokenType.NORMAL)