Fallback to tokenizer.json if vocab.json does not exist
This commit is contained in:
parent
56a00f0a2f
commit
a04bdfb4fa
1 changed files with 7 additions and 6 deletions
13
convert.py
13
convert.py
|
@ -1293,7 +1293,7 @@ def load_some_model(path: Path) -> ModelPlus:
|
|||
|
||||
|
||||
class VocabFactory:
|
||||
_FILES = {"spm": "tokenizer.model", "bpe": "vocab.json", "hfft": "tokenizer.json"}
|
||||
_FILES = {"spm": ["tokenizer.model"], "bpe": ["vocab.json", "tokenizer.json"], "hfft": ["tokenizer.json"]}
|
||||
|
||||
def __init__(self, path: Path):
|
||||
self.path = path
|
||||
|
@ -1301,11 +1301,12 @@ class VocabFactory:
|
|||
print(f"Found vocab files: {self.file_paths}")
|
||||
|
||||
def _detect_files(self) -> dict[str, Path | None]:
|
||||
def locate(file: str) -> Path | None:
|
||||
if (path := self.path / file).exists():
|
||||
return path
|
||||
if (path := self.path.parent / file).exists():
|
||||
return path
|
||||
def locate(files: list[str]) -> Path | None:
|
||||
for file in files:
|
||||
if (path := self.path / file).exists():
|
||||
return path
|
||||
if (path := self.path.parent / file).exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
return {vt: locate(f) for vt, f in self._FILES.items()}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue