server : simplify model chat template validation
This commit is contained in:
parent
fef64c587d
commit
b1b3ba886e
1 changed files with 10 additions and 14 deletions
|
@ -298,7 +298,6 @@ struct server_metrics {
|
||||||
uint64_t n_tokens_predicted = 0;
|
uint64_t n_tokens_predicted = 0;
|
||||||
uint64_t t_tokens_generation = 0;
|
uint64_t t_tokens_generation = 0;
|
||||||
|
|
||||||
|
|
||||||
void on_prompt_eval(const server_slot &slot) {
|
void on_prompt_eval(const server_slot &slot) {
|
||||||
n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed;
|
n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed;
|
||||||
n_prompt_tokens_processed += slot.n_prompt_tokens_processed;
|
n_prompt_tokens_processed += slot.n_prompt_tokens_processed;
|
||||||
|
@ -366,8 +365,7 @@ struct llama_server_context {
|
||||||
params = params_;
|
params = params_;
|
||||||
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
if (model == nullptr)
|
if (model == nullptr) {
|
||||||
{
|
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -379,15 +377,12 @@ struct llama_server_context {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void validate_model_chat_template(server_params & sparams) {
|
bool validate_model_chat_template() const {
|
||||||
llama_chat_message chat[] = {{"user", "test"}};
|
llama_chat_message chat[] = {{"user", "test"}};
|
||||||
|
|
||||||
std::vector<char> buf(1);
|
const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
|
||||||
if (res < 0) {
|
return res > 0;
|
||||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
|
||||||
sparams.chat_template = "chatml";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void initialize() {
|
void initialize() {
|
||||||
|
@ -478,9 +473,7 @@ struct llama_server_context {
|
||||||
prompt_tokens.push_back(p.template get<llama_token>());
|
prompt_tokens.push_back(p.template get<llama_token>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
auto s = json_prompt.template get<std::string>();
|
auto s = json_prompt.template get<std::string>();
|
||||||
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
|
@ -2572,7 +2565,10 @@ int main(int argc, char ** argv) {
|
||||||
const auto model_meta = llama.model_meta();
|
const auto model_meta = llama.model_meta();
|
||||||
|
|
||||||
if (sparams.chat_template.empty()) { // custom chat template is not supplied
|
if (sparams.chat_template.empty()) { // custom chat template is not supplied
|
||||||
llama.validate_model_chat_template(sparams);
|
if (!llama.validate_model_chat_template()) {
|
||||||
|
LOG_ERROR("The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
||||||
|
sparams.chat_template = "chatml";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Middleware for API key validation
|
// Middleware for API key validation
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue