From 84367b9fd1f0aa5db1a1b80d14febe18e1e19a82 Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Tue, 23 Apr 2024 12:46:40 +0530 Subject: [PATCH] ChatON: Add template for DeepSeek Was looking at the tokenized vector, and noticed that the EOS mentioned by existing chat_apply_template of llama.cpp, is different from what I noticed in tokenizer_config.json of deepseek llm, so I have added two entries * "deepseek-alt" which matches llama.cpp's chat_apply_template and * "deepseek" which matches that in tokenizer_config.json. This impacts the assistant suffix and reverse prompt entries. CasOfThis: Need to look into other entries which I added previously at a later time. However as the default logic should be picking the EOS from model file, so I assume reverse-prompt being outofsync, may not matter beyond a limit, potentially. --- common/chaton.hpp | 3 +++ examples/chaton_meta.json | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/common/chaton.hpp b/common/chaton.hpp index be9957ff9..222c8657f 100644 --- a/common/chaton.hpp +++ b/common/chaton.hpp @@ -49,6 +49,9 @@ * in-prefix, in-suffix and antiprompt of main. * These always adds any role specific prefix and suffix around the passed message. * + * Sample chaton_meta.json includes template info for + * * llama2, llama3, gemma, chatml, zephyr, deepseek + * */ #include diff --git a/examples/chaton_meta.json b/examples/chaton_meta.json index 05ffcc8fb..4f6bcfc30 100644 --- a/examples/chaton_meta.json +++ b/examples/chaton_meta.json @@ -99,6 +99,46 @@ }, "reverse-prompt": "", "systemuser-1st-user-has-prefix": true + }, + "deepseek-alt": { + "global": { + "begin": "", + "end": "" + }, + "system": { + "prefix": "", + "suffix": "\n" + }, + "user": { + "prefix": "### Instruction:\n", + "suffix": "\n" + }, + "assistant": { + "prefix": "### Response:\n", + "suffix": "\n<|EOT|>\n" + }, + "reverse-prompt": "<|EOT|>", + "systemuser-1st-user-has-prefix": true + }, + "deepseek": { + "global": { + "begin": "", + "end": "" + }, + "system": { + "prefix": "", + "suffix": "\n\n" + }, + "user": { + "prefix": "User: ", + "suffix": "\n\n" + }, + "assistant": { + "prefix": "Assistant: ", + "suffix": " <|end▁of▁sentence|>\n" + }, + "reverse-prompt": "<|end▁of▁sentence|>", + "systemuser-1st-user-has-prefix": true } }