From c4956e4a05ff0e9d94bd7d71e651f13ba1623614 Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Thu, 11 Jul 2024 19:50:48 +0200 Subject: [PATCH] update test: fix special and added token lists --- tests/test-tokenizer-random.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-tokenizer-random.py b/tests/test-tokenizer-random.py index ee79d7c27..440c3c2c2 100644 --- a/tests/test-tokenizer-random.py +++ b/tests/test-tokenizer-random.py @@ -152,8 +152,8 @@ class TokenizerGroundtruth (Tokenizer): # build vocab self.vocab = self.get_vocab(detokenize=True) # tokens and lists - self.special_tokens = list(self.model.all_special_tokens) - self.added_tokens = list(self.model.added_tokens_encoder) + self.special_tokens = [self.vocab[i] for i in sorted(self.model.all_special_ids)] + self.added_tokens = [self.vocab[i] for i in sorted(self.model.added_tokens_encoder.values())] self.bos_token = self.model.bos_token self.eos_token = self.model.eos_token