From 45e5df66dacbb0e6ebf579ba4e7c6d667bd72772 Mon Sep 17 00:00:00 2001 From: Aman Karmani Date: Mon, 10 Jul 2023 11:06:05 -0700 Subject: [PATCH] XgenVocab fix from @smdesai --- convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 5d2f8aa55..ee7148a65 100644 --- a/convert.py +++ b/convert.py @@ -217,13 +217,13 @@ class XgenVocab: def all_tokens(self) -> Iterable[Tuple[bytes, float]]: for index in range(0, self.vocab_size_base): - token = self.xt._convert_id_to_token(index) + token = self.xt.encoder.decode_single_token_bytes(index) yield (token, float(index)) for index in range(self.vocab_size_base, self.vocab_size): yield (b'', float(index)) def __repr__(self) -> str: - return f"" + return f"" class SentencePieceVocab: