convert-hf : Corrected sentencepiece API calls.
This commit is contained in:
parent
9acc3ecf34
commit
f4421f7cd8
1 changed files with 8 additions and 7 deletions
|
@ -2294,7 +2294,8 @@ class ArcticModel(Model):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Read the whole vocabulary from the tokenizer.model file
|
# Read the whole vocabulary from the tokenizer.model file
|
||||||
tokenizer = SentencePieceProcessor(str(tokenizer_path))
|
tokenizer = SentencePieceProcessor()
|
||||||
|
tokenizer.LoadFromFile(str(tokenizer_path))
|
||||||
|
|
||||||
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
||||||
|
|
||||||
|
@ -2304,18 +2305,18 @@ class ArcticModel(Model):
|
||||||
|
|
||||||
for token_id in range(tokenizer.vocab_size()):
|
for token_id in range(tokenizer.vocab_size()):
|
||||||
|
|
||||||
piece = tokenizer.id_to_piece(token_id)
|
piece = tokenizer.IdToPiece(token_id)
|
||||||
text = piece.encode("utf-8")
|
text = piece.encode("utf-8")
|
||||||
score = tokenizer.get_score(token_id)
|
score = tokenizer.GetScore(token_id)
|
||||||
|
|
||||||
toktype = SentencePieceTokenTypes.NORMAL
|
toktype = SentencePieceTokenTypes.NORMAL
|
||||||
if tokenizer.is_unknown(token_id):
|
if tokenizer.IsUnknown(token_id):
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
toktype = SentencePieceTokenTypes.UNKNOWN
|
||||||
elif tokenizer.is_control(token_id):
|
elif tokenizer.IsControl(token_id):
|
||||||
toktype = SentencePieceTokenTypes.CONTROL
|
toktype = SentencePieceTokenTypes.CONTROL
|
||||||
elif tokenizer.is_unused(token_id):
|
elif tokenizer.IsUnused(token_id):
|
||||||
toktype = SentencePieceTokenTypes.UNUSED
|
toktype = SentencePieceTokenTypes.UNUSED
|
||||||
elif tokenizer.is_byte(token_id):
|
elif tokenizer.IsByte(token_id):
|
||||||
toktype = SentencePieceTokenTypes.BYTE
|
toktype = SentencePieceTokenTypes.BYTE
|
||||||
|
|
||||||
tokens[token_id] = text
|
tokens[token_id] = text
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue