Fix convert.py error msg when added tokens are out of range

This commit is contained in:
KerfuffleV2 2023-10-17 04:30:13 -06:00
parent 22b914e0ba
commit 3a007e2c81

View file

@ -369,7 +369,7 @@ class SentencePieceVocab:
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens))) expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
actual_ids = sorted(added_tokens.values()) actual_ids = sorted(added_tokens.values())
if expected_ids != actual_ids: if expected_ids != actual_ids:
raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}") raise Exception(f"Expected added token IDs to be sequential and start at {vocab_size}; got {actual_ids}")
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1]) items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
self.added_tokens_list = [text for (text, idx) in items] self.added_tokens_list = [text for (text, idx) in items]