Simplified logics and UI
* removed "custom" template * fixed reading template, prefix and suffix from payload * removed `chat_template` from UI
This commit is contained in:
parent
a0e27c1cd0
commit
73f435fdfc
3 changed files with 24 additions and 53 deletions
|
@ -119,36 +119,6 @@
|
|||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Templates -->
|
||||
<div class="dropdown dropdown-end dropdown-bottom">
|
||||
<div tabindex="0" role="button" class="btn m-1">
|
||||
Templates
|
||||
<svg width="12px" height="12px" class="inline-block h-2 w-2 fill-current opacity-60" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2048 2048">
|
||||
<path d="M1799 349l242 241-1017 1017L7 590l242-241 775 775 775-775z"></path>
|
||||
</svg>
|
||||
</div>
|
||||
<ul tabindex="0" class="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto">
|
||||
<li>
|
||||
<button
|
||||
class="btn btn-sm btn-block w-full btn-ghost justify-start"
|
||||
:class="{ 'btn-active': config.chat_template === 'chatml' }"
|
||||
@click="config.chat_template = 'chatml'">
|
||||
auto
|
||||
</button>
|
||||
</li>
|
||||
<li v-for="tmpl in templates">
|
||||
<input
|
||||
type="radio"
|
||||
name="tmpl-dropdown"
|
||||
class="theme-controller btn btn-sm btn-block w-full btn-ghost justify-start"
|
||||
:aria-label="tmpl"
|
||||
:value="tmpl"
|
||||
:checked="config.chat_template === tmpl"
|
||||
@click="setSelectedTemplate(tmpl)" />
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -319,7 +289,6 @@
|
|||
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
|
||||
apiKey: '',
|
||||
systemMessage: 'You are a helpful assistant.',
|
||||
chat_template: 'chatml',
|
||||
input_prefix: '',
|
||||
input_suffix: '',
|
||||
// make sure these default values are in sync with `common.h`
|
||||
|
@ -347,7 +316,6 @@
|
|||
const CONFIG_INFO = {
|
||||
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
||||
systemMessage: 'The starting message that defines how model should behave.',
|
||||
chat_template: 'The fromat used for messages.',
|
||||
input_prefix: 'Prefix for user messages in custom chat templates.',
|
||||
input_suffix: 'Suffix for user messages in custom chat templates.',
|
||||
samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
|
||||
|
@ -375,7 +343,6 @@
|
|||
const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT).filter(e => isNumeric(e[1])).map(e => e[0]);
|
||||
// list of themes supported by daisyui
|
||||
const THEMES = ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'];
|
||||
const CHAT_TEMPLATES = ['chatml', 'llama2', 'mistral', 'phi3', 'zephyr', 'monarch', 'gemma', 'gemma2', 'orion', 'openchat', 'vicuna', 'vicuna-orca', 'deepseek', 'command-r', 'llama3', 'chatglm3', 'chatglm4', 'minicpm', 'deepseek2', 'exaone3', 'rwkv-world', 'granite', 'custom'];
|
||||
|
||||
// markdown support
|
||||
const VueMarkdown = defineComponent(
|
||||
|
@ -522,7 +489,6 @@
|
|||
editingMsg: null,
|
||||
// const
|
||||
themes: THEMES,
|
||||
templates: CHAT_TEMPLATES,
|
||||
configDefault: {...CONFIG_DEFAULT},
|
||||
configInfo: {...CONFIG_INFO},
|
||||
}
|
||||
|
@ -542,9 +508,6 @@
|
|||
this.selectedTheme = theme;
|
||||
StorageUtils.setTheme(theme);
|
||||
},
|
||||
setSelectedTemplate(template) {
|
||||
this.config.chat_template = template;
|
||||
},
|
||||
newConversation() {
|
||||
if (this.isGenerating) return;
|
||||
this.viewingConvId = StorageUtils.getNewConvId();
|
||||
|
@ -604,7 +567,6 @@
|
|||
stream: true,
|
||||
cache_prompt: true,
|
||||
samplers: this.config.samplers,
|
||||
chat_template: this.config.chat_template,
|
||||
input_prefix: this.config.input_prefix,
|
||||
input_suffix: this.config.input_suffix,
|
||||
temperature: this.config.temperature,
|
||||
|
|
|
@ -798,6 +798,7 @@ struct server_context {
|
|||
slot.oaicompat = false;
|
||||
slot.oaicompat_model = "";
|
||||
}
|
||||
std::string default_empty = "";
|
||||
|
||||
slot.params.stream = json_value(data, "stream", false);
|
||||
slot.params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||
|
@ -1147,9 +1148,6 @@ struct server_context {
|
|||
{"model", params.model_alias},
|
||||
{"seed", slot.sparams.seed},
|
||||
{"seed_cur", slot.smpl ? common_sampler_get_seed(slot.smpl) : 0},
|
||||
{"chat_template", params.chat_template},
|
||||
{"input_prefix", params.input_prefix},
|
||||
{"input_suffix", params.input_suffix},
|
||||
{"temperature", slot.sparams.temp},
|
||||
{"dynatemp_range", slot.sparams.dynatemp_range},
|
||||
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
|
||||
|
@ -3221,22 +3219,18 @@ int main(int argc, char ** argv) {
|
|||
|
||||
LOG_INF("%s: model loaded\n", __func__);
|
||||
|
||||
// if a standard chat template is not chosen, check prefix and suffix to switch to custom template
|
||||
// if a standard chat template is not chosen, check prefix and suffix to switch to custom formatting
|
||||
// otherwise use the one that comes with the model (if any)
|
||||
// if a standard chat template is chosen, warn about prefix and suffix not being used
|
||||
if (params.chat_template.empty()) {
|
||||
if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
|
||||
LOG_WRN("%s: Prefix and suffix are used instead of a chat template. This may cause the model to output suboptimal responses\n", __func__);
|
||||
params.chat_template = "custom";
|
||||
LOG_WRN("%s: Prefix and suffix will be used for a custom chat template. This may cause the model to output suboptimal responses\n", __func__);
|
||||
} else if (!ctx_server.validate_model_chat_template()) {
|
||||
LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
|
||||
params.chat_template = "chatml";
|
||||
}
|
||||
} else if (params.chat_template != "custom" &&
|
||||
(!params.input_prefix.empty() || !params.input_suffix.empty())) {
|
||||
LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a standard chat template is chosen.\n", __func__);
|
||||
} else {
|
||||
LOG_WRN("%s: Custom chat template is chosen. This may cause the model to output suboptimal responses\n", __func__);
|
||||
} else if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
|
||||
LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a chat template '%s' is chosen.\n", __func__, params.chat_template.c_str());
|
||||
}
|
||||
|
||||
// print sample chat example to make it clear which template is used
|
||||
|
|
|
@ -304,6 +304,8 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
|
|||
std::vector<common_chat_msg> chat;
|
||||
std::string formatted_chat;
|
||||
|
||||
bool is_custom = !prefix.empty() || !suffix.empty();
|
||||
|
||||
for (size_t i = 0; i < messages.size(); ++i) {
|
||||
const auto & curr_msg = messages[i];
|
||||
|
||||
|
@ -326,7 +328,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
|
|||
throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
|
||||
}
|
||||
|
||||
if (tmpl == "custom") {
|
||||
if (is_custom) {
|
||||
// simple format using prefix and suffix
|
||||
if (role == "user") formatted_chat += prefix + content + suffix;
|
||||
else formatted_chat += content;
|
||||
|
@ -335,7 +337,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
|
|||
}
|
||||
}
|
||||
|
||||
if (tmpl != "custom") formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
|
||||
if (!is_custom) formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
|
||||
LOG_WRN("formatted_chat using '%s': '%s'\n", tmpl.c_str(), formatted_chat.c_str());
|
||||
|
||||
return formatted_chat;
|
||||
|
@ -351,7 +353,7 @@ inline std::string format_chat_example(const struct llama_model * model, const s
|
|||
|
||||
std::string formatted_example;
|
||||
|
||||
if (tmpl == "custom") {
|
||||
if (!prefix.empty() || !suffix.empty()) {
|
||||
for (auto message : msgs) {
|
||||
if (message.role == "user") formatted_example += prefix + message.content + suffix;
|
||||
else formatted_example += message.content;
|
||||
|
@ -634,7 +636,20 @@ static json oaicompat_completion_params_parse(
|
|||
llama_params["__oaicompat"] = true;
|
||||
|
||||
// Apply chat template to the list of messages
|
||||
llama_params["prompt"] = format_chat(model, chat_template, input_prefix, input_suffix, body.at("messages"));
|
||||
std::string chat_tmpl = chat_template;
|
||||
std::string prefix = "";
|
||||
std::string suffix = "";
|
||||
|
||||
// if template is sent in data, ignore prefix and suffix
|
||||
if (body.contains("chat_template")) {
|
||||
chat_tmpl = body.at("chat_template").get<std::string>();
|
||||
LOG_WRN("\nUsing '%s' template, prefix and suffix are ignored.\n", chat_tmpl.c_str());
|
||||
} else {
|
||||
prefix = (body.contains("input_prefix") ? body.at("input_prefix").get<std::string>() : input_prefix);
|
||||
suffix = (body.contains("input_suffix") ? body.at("input_suffix").get<std::string>() : input_suffix);
|
||||
}
|
||||
|
||||
llama_params["prompt"] = format_chat(model, chat_tmpl, prefix, suffix, body.at("messages"));
|
||||
|
||||
// Handle "stop" field
|
||||
if (body.contains("stop") && body.at("stop").is_string()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue