From a0d28b250c31dc72acfd7ec8ef13ffbeaa1746e1 Mon Sep 17 00:00:00 2001 From: goerch Date: Mon, 24 Jul 2023 09:48:51 +0200 Subject: [PATCH] Remove comment --- convert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/convert.py b/convert.py index 8cd9ca75c..13e983d2e 100755 --- a/convert.py +++ b/convert.py @@ -231,7 +231,6 @@ class SentencePieceVocab: def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]: tokenizer = self.sentencepiece_tokenizer for i in range(tokenizer.vocab_size()): - # TODO: How do we want to support is_unknown, is_control, is_byte and is_unused? piece = tokenizer.id_to_piece(i) text: bytes = piece.encode("utf-8") score: float = tokenizer.get_score(i)