support bpe tokenizer in convert, fix
Signed-off-by: ldwang <ftgreat@gmail.com>
This commit is contained in:
parent
ee6bc1426e
commit
64b8aafce1
1 changed files with 2 additions and 1 deletions
|
@ -1189,6 +1189,7 @@ def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
|
||||||
|
|
||||||
|
|
||||||
def load_vocab(path: Path, vocabtype: Optional[str]) -> SentencePieceVocab:
|
def load_vocab(path: Path, vocabtype: Optional[str]) -> SentencePieceVocab:
|
||||||
|
print(f"vocabtype: {vocabtype}")
|
||||||
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
||||||
# a directory, it might be the model directory, and tokenizer.model might
|
# a directory, it might be the model directory, and tokenizer.model might
|
||||||
# be in the parent of that.
|
# be in the parent of that.
|
||||||
|
@ -1210,7 +1211,7 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> SentencePieceVocab:
|
||||||
added_tokens_path = path.parent / "added_tokens.json"
|
added_tokens_path = path.parent / "added_tokens.json"
|
||||||
print(f"Loading vocab file {path}")
|
print(f"Loading vocab file {path}")
|
||||||
return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None,
|
return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None,
|
||||||
vocab_file)
|
vocabtype)
|
||||||
|
|
||||||
|
|
||||||
def default_outfile(model_paths: List[Path], file_type: GGMLFileType) -> Path:
|
def default_outfile(model_paths: List[Path], file_type: GGMLFileType) -> Path:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue