convert : use a string for the SentencePiece tokenizer path
This commit is contained in:
parent
3870164f47
commit
dcd8dfa1b5
1 changed files with 1 additions and 1 deletions
|
@ -463,7 +463,7 @@ class SentencePieceVocab(Vocab):
|
||||||
raise FileNotFoundError('Cannot find tokenizer.model')
|
raise FileNotFoundError('Cannot find tokenizer.model')
|
||||||
|
|
||||||
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
||||||
self.sentencepiece_tokenizer.LoadFromFile(fname_tokenizer)
|
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
|
||||||
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
||||||
|
|
||||||
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue