Remove comment

This commit is contained in:
goerch 2023-07-24 09:48:51 +02:00
parent 281a4b4f27
commit a0d28b250c

View file

@ -231,7 +231,6 @@ class SentencePieceVocab:
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]: def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
tokenizer = self.sentencepiece_tokenizer tokenizer = self.sentencepiece_tokenizer
for i in range(tokenizer.vocab_size()): for i in range(tokenizer.vocab_size()):
# TODO: How do we want to support is_unknown, is_control, is_byte and is_unused?
piece = tokenizer.id_to_piece(i) piece = tokenizer.id_to_piece(i)
text: bytes = piece.encode("utf-8") text: bytes = piece.encode("utf-8")
score: float = tokenizer.get_score(i) score: float = tokenizer.get_score(i)