From 67b38f5849ac5b116ed869ac8499aace0435457a Mon Sep 17 00:00:00 2001 From: fbuciuni90 Date: Thu, 6 Feb 2025 16:02:00 +0000 Subject: [PATCH 1/9] Supporting Velvet model --- convert_hf_to_gguf.py | 3 +++ convert_hf_to_gguf_update.py | 1 + include/llama.h | 1 + src/llama-chat.cpp | 27 ++++++++++++++++++++++++++- src/llama-chat.h | 1 + 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 018a2a588..9da7963c4 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -699,6 +699,9 @@ class Model: if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5": # ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B res = "deepseek-r1-qwen" + if chkhsh == "a3df2b8943e01cfd7d68c9f8446b294f3d8706d1d6853df65df7fda5d4fcb19f": + # ref: https://huggingface.co/Almawave/Velvet-14B + res = "velvet" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index cea34413f..241d04557 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -109,6 +109,7 @@ models = [ {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"}, {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"}, {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"}, + {"name": "velvet", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Almawave/Velvet-14B"} ] diff --git a/include/llama.h b/include/llama.h index 61907ed40..a1fbd213a 100644 --- a/include/llama.h +++ b/include/llama.h @@ -105,6 +105,7 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, + LLAMA_VOCAB_PRE_TYPE_VELVET = 29 }; enum llama_rope_type { diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 028a64794..0cae2bb10 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -58,6 +58,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "granite", LLM_CHAT_TEMPLATE_GRANITE }, { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, + { "velvet", LLM_CHAT_TEMPLATE_VELVET }, }; llm_chat_template llm_chat_template_from_str(const std::string & name) { @@ -167,6 +168,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_GIGACHAT; } else if (tmpl_contains("<|role_start|>")) { return LLM_CHAT_TEMPLATE_MEGREZ; + } else if (tmpl_contains("")) { + return LLM_CHAT_TEMPLATE_VELVET; } return LLM_CHAT_TEMPLATE_UNKNOWN; } @@ -566,10 +569,32 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "<|role_start|>assistant<|role_end|>"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_VELVET) { + // Velvet template + std::string leading_space = ""; + std::string trailing_space = ""; + bool trim_assistant_message = true; + bool is_inside_turn = false; + for (auto message : chat) { + if (!is_inside_turn) { + ss << leading_space << "" << trailing_space; + is_inside_turn = true; + } + std::string role(message->role); + std::string content(message->content); + if (role == "system") { + ss << content << "\n\n"; + } else if (role == "user") { + ss << content << leading_space << ""; + } else { + ss << trailing_space << (trim_assistant_message ? trim(content) : content) << ""; + is_inside_turn = false; + } + } } else { // template not supported return -1; - } + } dest = ss.str(); return dest.size(); } diff --git a/src/llama-chat.h b/src/llama-chat.h index 2f6a0e3e2..e91f09f2c 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -39,6 +39,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_GIGACHAT, LLM_CHAT_TEMPLATE_MEGREZ, LLM_CHAT_TEMPLATE_UNKNOWN, + LLM_CHAT_TEMPLATE_VELVET }; struct llama_chat_message; From 07e1d0a14caad16e152ffb1e086da6334670a770 Mon Sep 17 00:00:00 2001 From: Francesco Buciuni Date: Thu, 6 Feb 2025 17:38:30 +0100 Subject: [PATCH 2/9] Update convert_hf_to_gguf.py Co-authored-by: Xuan-Son Nguyen --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 9da7963c4..b5a4a4aaa 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -701,7 +701,7 @@ class Model: res = "deepseek-r1-qwen" if chkhsh == "a3df2b8943e01cfd7d68c9f8446b294f3d8706d1d6853df65df7fda5d4fcb19f": # ref: https://huggingface.co/Almawave/Velvet-14B - res = "velvet" + res = "velvet" if res is None: logger.warning("\n") From 99be555369fdf8a08ab81a44beccc53ea06fd7c7 Mon Sep 17 00:00:00 2001 From: Francesco Buciuni Date: Thu, 6 Feb 2025 17:38:58 +0100 Subject: [PATCH 3/9] Update convert_hf_to_gguf.py Co-authored-by: Xuan-Son Nguyen From 3df9d221edb988b809bb34288560a592464f01d7 Mon Sep 17 00:00:00 2001 From: Francesco Buciuni Date: Thu, 6 Feb 2025 17:39:47 +0100 Subject: [PATCH 4/9] Update include/llama.h Co-authored-by: Xuan-Son Nguyen --- include/llama.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/llama.h b/include/llama.h index a1fbd213a..f08a9ced3 100644 --- a/include/llama.h +++ b/include/llama.h @@ -105,7 +105,7 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, - LLAMA_VOCAB_PRE_TYPE_VELVET = 29 + LLAMA_VOCAB_PRE_TYPE_VELVET = 29, }; enum llama_rope_type { From 52b0bb3731c1055971460b14398e7cbc308670be Mon Sep 17 00:00:00 2001 From: Francesco Buciuni Date: Thu, 6 Feb 2025 17:44:45 +0100 Subject: [PATCH 5/9] Update src/llama-chat.cpp Co-authored-by: Xuan-Son Nguyen --- src/llama-chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 0cae2bb10..8e60c1e8e 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -594,7 +594,7 @@ int32_t llm_chat_apply_template( } else { // template not supported return -1; - } + } dest = ss.str(); return dest.size(); } From 9d86a0442dcecf84c845b7e0a3b3b549f217b703 Mon Sep 17 00:00:00 2001 From: fbuciuni90 Date: Fri, 7 Feb 2025 08:12:02 +0000 Subject: [PATCH 6/9] removing whitespaces in src/lla-a-chat.cpp --- src/llama-chat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 8e60c1e8e..876c90691 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -169,7 +169,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { } else if (tmpl_contains("<|role_start|>")) { return LLM_CHAT_TEMPLATE_MEGREZ; } else if (tmpl_contains("")) { - return LLM_CHAT_TEMPLATE_VELVET; + return LLM_CHAT_TEMPLATE_VELVET; } return LLM_CHAT_TEMPLATE_UNKNOWN; } From 66e6d10b61d190935dee074b5515d3494270cb71 Mon Sep 17 00:00:00 2001 From: "f.buciuni" Date: Fri, 7 Feb 2025 19:53:16 +0100 Subject: [PATCH 7/9] fixing position of LLM_CHAT_TEMPLATE_VELVET in enum --- src/llama-chat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-chat.h b/src/llama-chat.h index e91f09f2c..0fe4b8e22 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -38,8 +38,8 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_GRANITE, LLM_CHAT_TEMPLATE_GIGACHAT, LLM_CHAT_TEMPLATE_MEGREZ, + LLM_CHAT_TEMPLATE_VELVET, LLM_CHAT_TEMPLATE_UNKNOWN, - LLM_CHAT_TEMPLATE_VELVET }; struct llama_chat_message; From 39795570dbe90a05f98520c3fd0b7d783464ab11 Mon Sep 17 00:00:00 2001 From: "f.buciuni" Date: Fri, 7 Feb 2025 19:54:55 +0100 Subject: [PATCH 8/9] updating velvet chat template --- src/llama-chat.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 876c90691..af0539bd4 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -573,8 +573,11 @@ int32_t llm_chat_apply_template( // Velvet template std::string leading_space = ""; std::string trailing_space = ""; - bool trim_assistant_message = true; + bool trim_assistant_message = false; bool is_inside_turn = false; + std::string system_message = ""; + std::string last_message(chat.back()->content); + ss << ""; for (auto message : chat) { if (!is_inside_turn) { ss << leading_space << "" << trailing_space; @@ -583,9 +586,9 @@ int32_t llm_chat_apply_template( std::string role(message->role); std::string content(message->content); if (role == "system") { - ss << content << "\n\n"; + system_message = content + "\n\n"; } else if (role == "user") { - ss << content << leading_space << ""; + ss << (content==last_message ? system_message : "") << content << leading_space << ""; } else { ss << trailing_space << (trim_assistant_message ? trim(content) : content) << ""; is_inside_turn = false; From 0a8995a375f897807efef169bb6d89209b962409 Mon Sep 17 00:00:00 2001 From: "f.buciuni" Date: Fri, 7 Feb 2025 19:56:23 +0100 Subject: [PATCH 9/9] adding test case for velvet chat template --- tests/test-chat-template.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index e0314ae1d..d970beb57 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -262,6 +262,14 @@ int main(void) { /* .bos_token= */ "", /* .eos_token= */ "", }, + { + /* .name= */ "velvet", + /* .template_str= */ "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if message == user_messages[-1] and system_message is defined and tools is not none %}\n {{- \"\" + system_message + \"\\n\\n\" + \"[\" }}\n {%- for tool in tools %}\n{{ tool }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %}\n {{- \"\" + \"\\n\\n\" + message[\"content\"] + \"\" }}\n {%- elif message == user_messages[-1] and system_message is defined and tools is none %}\n {{- \"\" + system_message + \"\\n\\n\" + message[\"content\"] + \"\" }}\n {%- else %}\n {{- \"\" + message[\"content\"] + \"\" }}\n {%- endif %}\n {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n {{- \"[\" }}\n {%- for tool_call in message.tool_calls %}\n{{ tool_call }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- else %}\n {{- \"]\" }}\n {%- endif %}\n {%- endfor %} {{- \"\" + eos_token }}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- message[\"content\"] + eos_token}}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {%- if message.content is defined and message.content.content is defined %}\n {%- set content = message.content.content %}\n {%- else %}\n {%- set content = message.content %}\n {%- endif %}\n {{- '{\"content\": ' + content|string }}\n {{- '}' }}\n {%- else %}\n {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n {%- endif %}\n{%- endfor %}\n", + /* .expected_output= */ "HelloHi thereWho are you I am an assistant You are a helpful assistant\n\nAnother question", + /* .expected_output_jinja= */ "HelloHi thereWho are you I am an assistant You are a helpful assistant\n\nAnother question", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, }; std::vector formatted_chat(1024); int32_t res;