From dcd8dfa1b5a242e8b48c1b0eaf6e765abf316158 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 1 May 2024 13:07:10 -0400 Subject: [PATCH] convert : use a string for the SentencePiece tokenizer path --- convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert.py b/convert.py index fd5487dbb..ce1a4b9f5 100755 --- a/convert.py +++ b/convert.py @@ -463,7 +463,7 @@ class SentencePieceVocab(Vocab): raise FileNotFoundError('Cannot find tokenizer.model') self.sentencepiece_tokenizer = SentencePieceProcessor() - self.sentencepiece_tokenizer.LoadFromFile(fname_tokenizer) + self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer)) vocab_size = self.sentencepiece_tokenizer.vocab_size() new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}