support bpe tokenizer in convert, fix
Signed-off-by: ldwang <ftgreat@gmail.com>
This commit is contained in:
parent
ee6bc1426e
commit
64b8aafce1
1 changed files with 2 additions and 1 deletions
|
@ -1189,6 +1189,7 @@ def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
|
|||
|
||||
|
||||
def load_vocab(path: Path, vocabtype: Optional[str]) -> SentencePieceVocab:
|
||||
print(f"vocabtype: {vocabtype}")
|
||||
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
||||
# a directory, it might be the model directory, and tokenizer.model might
|
||||
# be in the parent of that.
|
||||
|
@ -1210,7 +1211,7 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> SentencePieceVocab:
|
|||
added_tokens_path = path.parent / "added_tokens.json"
|
||||
print(f"Loading vocab file {path}")
|
||||
return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None,
|
||||
vocab_file)
|
||||
vocabtype)
|
||||
|
||||
|
||||
def default_outfile(model_paths: List[Path], file_type: GGMLFileType) -> Path:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue