ChatON: Add template for DeepSeek

Was looking at the tokenized vector, and noticed that the EOS mentioned by existing chat_apply_template of llama.cpp, is different from what I noticed in tokenizer_config.json of deepseek llm, so I have added two entries * "deepseek-alt" which matches llama.cpp's chat_apply_template and * "deepseek" which matches that in tokenizer_config.json. This impacts the assistant suffix and reverse prompt entries. CasOfThis: Need to look into other entries which I added previously at a later time. However as the default logic should be picking the EOS from model file, so I assume reverse-prompt being outofsync, may not matter beyond a limit, potentially.
2024-04-23 12:46:40 +05:30 · 2024-04-23 12:46:40 +05:30 · 84367b9fd1
commit 84367b9fd1
parent f4b54069f6
2 changed files with 43 additions and 0 deletions
--- a/common/chaton.hpp
+++ b/common/chaton.hpp
@ -49,6 +49,9 @@
 *   in-prefix, in-suffix and antiprompt of main.
 * These always adds any role specific prefix and suffix around the passed message.
 *
+ * Sample chaton_meta.json includes template info for
+ * * llama2, llama3, gemma, chatml, zephyr, deepseek
+ *
 */

 #include <string>
--- a/examples/chaton_meta.json
+++ b/examples/chaton_meta.json
@ -99,6 +99,46 @@
 		},
 		"reverse-prompt": "<eos>",
 		"systemuser-1st-user-has-prefix": true
+	},
+	"deepseek-alt": {
+		"global": {
+			"begin": "",
+			"end": ""
+		},
+		"system": {
+			"prefix": "",
+			"suffix": "\n"
+		},
+		"user": {
+			"prefix": "### Instruction:\n",
+			"suffix": "\n"
+		},
+		"assistant": {
+			"prefix": "### Response:\n",
+			"suffix": "\n<|EOT|>\n"
+		},
+		"reverse-prompt": "<|EOT|>",
+		"systemuser-1st-user-has-prefix": true
+	},
+	"deepseek": {
+		"global": {
+			"begin": "",
+			"end": ""
+		},
+		"system": {
+			"prefix": "",
+			"suffix": "\n\n"
+		},
+		"user": {
+			"prefix": "User: ",
+			"suffix": "\n\n"
+		},
+		"assistant": {
+			"prefix": "Assistant: ",
+			"suffix": " <｜end▁of▁sentence｜>\n"
+		},
+		"reverse-prompt": "<｜end▁of▁sentence｜>",
+		"systemuser-1st-user-has-prefix": true
 	}
 }