From df4a9c99c2d079524a52f1ed9a446dc089c231ae Mon Sep 17 00:00:00 2001 From: Sang-Kil Park Date: Mon, 29 Jan 2024 18:20:28 +0900 Subject: [PATCH] Support for all cases that have/haven't `["model"]["vocab"]`. --- convert.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/convert.py b/convert.py index 06768033d..b48afba1e 100755 --- a/convert.py +++ b/convert.py @@ -334,7 +334,10 @@ class Params: class BpeVocab: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) - self.vocab = self.bpe_tokenizer["model"]["vocab"] + try: + self.vocab = self.bpe_tokenizer["model"]["vocab"] + except: + self.vocab = self.bpe_tokenizer added_tokens: dict[str, int] if fname_added_tokens is not None: # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.