Remove comment
This commit is contained in:
parent
281a4b4f27
commit
a0d28b250c
1 changed files with 0 additions and 1 deletions
|
@ -231,7 +231,6 @@ class SentencePieceVocab:
|
||||||
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
|
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:
|
||||||
tokenizer = self.sentencepiece_tokenizer
|
tokenizer = self.sentencepiece_tokenizer
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
# TODO: How do we want to support is_unknown, is_control, is_byte and is_unused?
|
|
||||||
piece = tokenizer.id_to_piece(i)
|
piece = tokenizer.id_to_piece(i)
|
||||||
text: bytes = piece.encode("utf-8")
|
text: bytes = piece.encode("utf-8")
|
||||||
score: float = tokenizer.get_score(i)
|
score: float = tokenizer.get_score(i)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue