update test: fix special and added token lists

This commit is contained in:
jaime-m-p 2024-07-11 19:50:48 +02:00
parent 3eb1900e5c
commit c4956e4a05

View file

@ -152,8 +152,8 @@ class TokenizerGroundtruth (Tokenizer):
# build vocab # build vocab
self.vocab = self.get_vocab(detokenize=True) self.vocab = self.get_vocab(detokenize=True)
# tokens and lists # tokens and lists
self.special_tokens = list(self.model.all_special_tokens) self.special_tokens = [self.vocab[i] for i in sorted(self.model.all_special_ids)]
self.added_tokens = list(self.model.added_tokens_encoder) self.added_tokens = [self.vocab[i] for i in sorted(self.model.added_tokens_encoder.values())]
self.bos_token = self.model.bos_token self.bos_token = self.model.bos_token
self.eos_token = self.model.eos_token self.eos_token = self.model.eos_token