update test: fix special and added token lists
This commit is contained in:
parent
3eb1900e5c
commit
c4956e4a05
1 changed files with 2 additions and 2 deletions
|
@ -152,8 +152,8 @@ class TokenizerGroundtruth (Tokenizer):
|
|||
# build vocab
|
||||
self.vocab = self.get_vocab(detokenize=True)
|
||||
# tokens and lists
|
||||
self.special_tokens = list(self.model.all_special_tokens)
|
||||
self.added_tokens = list(self.model.added_tokens_encoder)
|
||||
self.special_tokens = [self.vocab[i] for i in sorted(self.model.all_special_ids)]
|
||||
self.added_tokens = [self.vocab[i] for i in sorted(self.model.added_tokens_encoder.values())]
|
||||
self.bos_token = self.model.bos_token
|
||||
self.eos_token = self.model.eos_token
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue