From 3a007e2c81193ce233a6ef728449b50a6e86fa90 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Tue, 17 Oct 2023 04:30:13 -0600 Subject: [PATCH] Fix convert.py error msg when added tokens are out of range --- convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert.py b/convert.py index e9b08d344..f506546c0 100755 --- a/convert.py +++ b/convert.py @@ -369,7 +369,7 @@ class SentencePieceVocab: expected_ids = list(range(vocab_size, vocab_size + len(added_tokens))) actual_ids = sorted(added_tokens.values()) if expected_ids != actual_ids: - raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}") + raise Exception(f"Expected added token IDs to be sequential and start at {vocab_size}; got {actual_ids}") items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1]) self.added_tokens_list = [text for (text, idx) in items]