From eaefebb2c90729c9652c0a056104ca03169847be Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Sat, 18 Nov 2023 14:53:34 +0100
Subject: [PATCH] gguf-py : export chat templates

---
 gguf-py/gguf/constants.py   | 29 +++++++++++++++--------------
 gguf-py/gguf/gguf_writer.py |  3 +++
 gguf-py/gguf/vocab.py       |  6 ++++++
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 7f63361bd..8bd82daca 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -56,20 +56,21 @@ class Keys:
         SCALING_FINETUNED    = "{arch}.rope.scaling.finetuned"
 
     class Tokenizer:
-        MODEL      = "tokenizer.ggml.model"
-        LIST       = "tokenizer.ggml.tokens"
-        TOKEN_TYPE = "tokenizer.ggml.token_type"
-        SCORES     = "tokenizer.ggml.scores"
-        MERGES     = "tokenizer.ggml.merges"
-        BOS_ID     = "tokenizer.ggml.bos_token_id"
-        EOS_ID     = "tokenizer.ggml.eos_token_id"
-        UNK_ID     = "tokenizer.ggml.unknown_token_id"
-        SEP_ID     = "tokenizer.ggml.seperator_token_id"
-        PAD_ID     = "tokenizer.ggml.padding_token_id"
-        ADD_BOS    = "tokenizer.ggml.add_bos_token"
-        ADD_EOS    = "tokenizer.ggml.add_eos_token"
-        HF_JSON    = "tokenizer.huggingface.json"
-        RWKV       = "tokenizer.rwkv.world"
+        MODEL         = "tokenizer.ggml.model"
+        LIST          = "tokenizer.ggml.tokens"
+        TOKEN_TYPE    = "tokenizer.ggml.token_type"
+        SCORES        = "tokenizer.ggml.scores"
+        MERGES        = "tokenizer.ggml.merges"
+        BOS_ID        = "tokenizer.ggml.bos_token_id"
+        EOS_ID        = "tokenizer.ggml.eos_token_id"
+        UNK_ID        = "tokenizer.ggml.unknown_token_id"
+        SEP_ID        = "tokenizer.ggml.seperator_token_id"
+        PAD_ID        = "tokenizer.ggml.padding_token_id"
+        ADD_BOS       = "tokenizer.ggml.add_bos_token"
+        ADD_EOS       = "tokenizer.ggml.add_eos_token"
+        HF_JSON       = "tokenizer.huggingface.json"
+        RWKV          = "tokenizer.rwkv.world"
+        CHAT_TEMPLATE = "tokenizer.chat_template"
 
 
 #
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index c3b8c588f..ab7382c44 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -399,6 +399,9 @@ class GGUFWriter:
     def add_add_eos_token(self, value: bool) -> None:
         self.add_bool(Keys.Tokenizer.ADD_EOS, value)
 
+    def add_chat_template(self, value: str) -> None:
+        self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
+
     def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
         pack_prefix = ''
         if not skip_pack_prefix:
diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py
index b9f50a0af..cc3f6bf4c 100644
--- a/gguf-py/gguf/vocab.py
+++ b/gguf-py/gguf/vocab.py
@@ -13,6 +13,7 @@ class SpecialVocab:
     merges: list[str]
     add_special_token: dict[str, bool]
     special_token_ids: dict[str, int]
+    chat_template: str | None
 
     def __init__(
         self, path: str | os.PathLike[str], load_merges: bool = False,
@@ -67,6 +68,10 @@ class SpecialVocab:
             if not quiet:
                 print(f'gguf: Setting add_{typ}_token to {value}')
             add_handler(value)
+        if self.chat_template is not None:
+            if not quiet:
+                print(f'gguf: Setting chat_template to {self.chat_template}')
+            gw.add_chat_template(self.chat_template)
 
     def _load(self, path: Path) -> None:
         self._try_load_from_tokenizer_json(path)
@@ -156,6 +161,7 @@ class SpecialVocab:
                 None,
             )
             self._set_special_token(typ, maybe_token_id)
+        self.chat_template = tokenizer_config.get('chat_template')
         return True
 
     def _try_load_from_config_json(self, path: Path) -> bool: