Fix convert.py error msg when added tokens are out of range
This commit is contained in:
parent
22b914e0ba
commit
3a007e2c81
1 changed files with 1 additions and 1 deletions
|
@ -369,7 +369,7 @@ class SentencePieceVocab:
|
||||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||||
actual_ids = sorted(added_tokens.values())
|
actual_ids = sorted(added_tokens.values())
|
||||||
if expected_ids != actual_ids:
|
if expected_ids != actual_ids:
|
||||||
raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}")
|
raise Exception(f"Expected added token IDs to be sequential and start at {vocab_size}; got {actual_ids}")
|
||||||
|
|
||||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||||
self.added_tokens_list = [text for (text, idx) in items]
|
self.added_tokens_list = [text for (text, idx) in items]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue