From edd98313ca7362c7cee7fad9c5f4ce5da32f0a89 Mon Sep 17 00:00:00 2001 From: slaren Date: Sat, 18 Nov 2023 16:59:22 +0100 Subject: [PATCH] gguf-py : check chat_template type --- gguf-py/gguf/vocab.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index cc3f6bf4c..f75286eca 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -137,6 +137,14 @@ class SpecialVocab: return True with open(tokenizer_config_file, encoding = 'utf-8') as f: tokenizer_config = json.load(f) + chat_template = tokenizer_config.get('chat_template') + if chat_template is None or isinstance(chat_template, str): + self.chat_template = chat_template + else: + print( + f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring', + file = sys.stderr + ) for typ in self.special_token_types: add_entry = tokenizer_config.get(f'add_{typ}_token') if isinstance(add_entry, bool): @@ -161,7 +169,6 @@ class SpecialVocab: None, ) self._set_special_token(typ, maybe_token_id) - self.chat_template = tokenizer_config.get('chat_template') return True def _try_load_from_config_json(self, path: Path) -> bool: