From 15c2df912638d95584dc024e058c536828fb2244 Mon Sep 17 00:00:00 2001 From: Sang-Kil Park Date: Mon, 5 Feb 2024 00:50:27 +0900 Subject: [PATCH] Modify the vocab selection algorithm. --- convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 7a90d6c4c..661f8c65d 100755 --- a/convert.py +++ b/convert.py @@ -334,9 +334,9 @@ class Params: class BpeVocab: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) - try: + if isinstance(self.bpe_tokenizer.get('model'), dict): self.vocab = self.bpe_tokenizer["model"]["vocab"] - except (KeyError, TypeError): + else: self.vocab = self.bpe_tokenizer added_tokens: dict[str, int] if fname_added_tokens is not None: