fix codestyle
This commit is contained in:
parent
bbe1926fac
commit
d07f0a90c3
3 changed files with 30 additions and 31 deletions
|
@ -87,35 +87,35 @@ class Keys:
|
|||
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
||||
|
||||
class Tokenizer:
|
||||
MODEL = "tokenizer.ggml.model"
|
||||
PRE = "tokenizer.ggml.pre"
|
||||
LIST = "tokenizer.ggml.tokens"
|
||||
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
||||
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
|
||||
SCORES = "tokenizer.ggml.scores"
|
||||
MERGES = "tokenizer.ggml.merges"
|
||||
BOS_ID = "tokenizer.ggml.bos_token_id"
|
||||
EOS_ID = "tokenizer.ggml.eos_token_id"
|
||||
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
||||
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
||||
PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||
CLS_ID = "tokenizer.ggml.cls_token_id"
|
||||
MASK_ID = "tokenizer.ggml.mask_token_id"
|
||||
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
||||
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
||||
ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
|
||||
REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
|
||||
MODEL = "tokenizer.ggml.model"
|
||||
PRE = "tokenizer.ggml.pre"
|
||||
LIST = "tokenizer.ggml.tokens"
|
||||
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
||||
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
|
||||
SCORES = "tokenizer.ggml.scores"
|
||||
MERGES = "tokenizer.ggml.merges"
|
||||
BOS_ID = "tokenizer.ggml.bos_token_id"
|
||||
EOS_ID = "tokenizer.ggml.eos_token_id"
|
||||
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
||||
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
||||
PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||
CLS_ID = "tokenizer.ggml.cls_token_id"
|
||||
MASK_ID = "tokenizer.ggml.mask_token_id"
|
||||
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
||||
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
||||
ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
|
||||
REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
|
||||
PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
|
||||
HF_JSON = "tokenizer.huggingface.json"
|
||||
RWKV = "tokenizer.rwkv.world"
|
||||
CHAT_TEMPLATE = "tokenizer.chat_template"
|
||||
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
||||
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
||||
HF_JSON = "tokenizer.huggingface.json"
|
||||
RWKV = "tokenizer.rwkv.world"
|
||||
CHAT_TEMPLATE = "tokenizer.chat_template"
|
||||
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
||||
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
||||
# FIM/Infill special tokens constants
|
||||
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
||||
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
||||
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
||||
EOT_ID = "tokenizer.ggml.eot_token_id"
|
||||
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
||||
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
||||
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
||||
EOT_ID = "tokenizer.ggml.eot_token_id"
|
||||
|
||||
#
|
||||
# recommended mapping of model tensor names for storage in gguf
|
||||
|
|
|
@ -5222,7 +5222,6 @@ static void llm_load_vocab(
|
|||
vocab.special_eot_id = 107;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
|
||||
} catch (const std::exception & e) {
|
||||
|
@ -19829,7 +19828,7 @@ static int32_t llama_chat_apply_template_internal(
|
|||
if (add_ass) {
|
||||
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
|
||||
}
|
||||
} else if (tmpl.find("chatglm3") != std::string::npos || tmpl.find("[gMASK]sop") != std::string::npos) {
|
||||
} else if (tmpl == "chatglm3" || tmpl_contains("[gMASK]sop")) {
|
||||
// chatglm3-6b
|
||||
ss << "[gMASK]" << "sop";
|
||||
for (auto message : chat) {
|
||||
|
@ -19839,7 +19838,7 @@ static int32_t llama_chat_apply_template_internal(
|
|||
if (add_ass) {
|
||||
ss << "<|assistant|>";
|
||||
}
|
||||
} else if (tmpl.find("chatglm4") != std::string::npos || tmpl.find("[gMASK]<sop>") != std::string::npos) {
|
||||
} else if (tmpl == "chaglm4" || tmpl_contains("[gMASK]<sop>")) {
|
||||
ss << "[gMASK]" << "<sop>";
|
||||
for (auto message : chat) {
|
||||
std::string role(message->role);
|
||||
|
|
|
@ -61,7 +61,7 @@ int main(void) {
|
|||
// ChatGLM3
|
||||
"{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
||||
// ChatGLM4
|
||||
"chatglm4",
|
||||
u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
||||
};
|
||||
std::vector<std::string> expected_output = {
|
||||
// teknium/OpenHermes-2.5-Mistral-7B
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue