convert : use a string for the SentencePiece tokenizer path

This commit is contained in:
Francis Couture-Harpin 2024-05-01 13:07:10 -04:00
parent 3870164f47
commit dcd8dfa1b5

View file

@ -463,7 +463,7 @@ class SentencePieceVocab(Vocab):
raise FileNotFoundError('Cannot find tokenizer.model') raise FileNotFoundError('Cannot find tokenizer.model')
self.sentencepiece_tokenizer = SentencePieceProcessor() self.sentencepiece_tokenizer = SentencePieceProcessor()
self.sentencepiece_tokenizer.LoadFromFile(fname_tokenizer) self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
vocab_size = self.sentencepiece_tokenizer.vocab_size() vocab_size = self.sentencepiece_tokenizer.vocab_size()
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size} new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}