From 45983e3a47e7ce6cbf5594b487f209d0c5019ca4 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Thu, 4 Apr 2024 17:44:58 -0400 Subject: [PATCH] convert : remove now-unused ignore_nonllama parameter --- convert.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/convert.py b/convert.py index b0dd6628a..5abd77f05 100755 --- a/convert.py +++ b/convert.py @@ -516,7 +516,7 @@ class LlamaHfVocab(Vocab): tokenizer_model = "llama" name = "hfft" - def __init__(self, base_path: Path, ignore_nonllama: bool = False): + def __init__(self, base_path: Path): fname_tokenizer = base_path / FAST_TOKENIZER_FILE # if this fails, FileNotFoundError propagates to caller with open(fname_tokenizer, encoding='utf-8') as f: @@ -524,9 +524,7 @@ class LlamaHfVocab(Vocab): # pre-check so we know if we need transformers tokenizer_model: dict[str, Any] = tokenizer_json['model'] - if ignore_nonllama: - pass # workaround incorrect use of this class for WordPiece - elif ( + if ( tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False) or tokenizer_json['decoder']['type'] != 'Sequence' ):