server: tokenize endpoint no longer adds BOS

This commit is contained in:
Xiao-Yong Jin 2023-07-26 17:42:20 -05:00
parent 53c2db1685
commit bb3770b3e6

View file

@ -258,8 +258,10 @@ struct llama_server_context
return true; return true;
} }
std::vector<llama_token> tokenizePrompt(json json_prompt) std::vector<llama_token> tokenize(json json_prompt, bool add_bos)
{ {
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
// or the first element of the json_prompt array is a string.
std::vector<llama_token> prompt_tokens; std::vector<llama_token> prompt_tokens;
if (json_prompt.is_array()) if (json_prompt.is_array())
@ -274,7 +276,7 @@ struct llama_server_context
if (first) if (first)
{ {
s.insert(0, 1, ' '); // add a space if it's the first s.insert(0, 1, ' '); // add a space if it's the first
p = ::llama_tokenize(ctx, s, true); // also add BOS p = ::llama_tokenize(ctx, s, add_bos);
first = false; first = false;
} }
else else
@ -297,7 +299,7 @@ struct llama_server_context
{ {
auto s = json_prompt.template get<std::string>(); auto s = json_prompt.template get<std::string>();
s.insert(0, 1, ' '); // always add a first space s.insert(0, 1, ' '); // always add a first space
prompt_tokens = ::llama_tokenize(ctx, s, true); prompt_tokens = ::llama_tokenize(ctx, s, add_bos);
} }
return prompt_tokens; return prompt_tokens;
@ -305,7 +307,7 @@ struct llama_server_context
void loadPrompt() void loadPrompt()
{ {
auto prompt_tokens = tokenizePrompt(prompt); auto prompt_tokens = tokenize(prompt, true); // always add BOS
num_prompt_tokens = prompt_tokens.size(); num_prompt_tokens = prompt_tokens.size();
@ -1327,7 +1329,7 @@ int main(int argc, char **argv)
std::vector<llama_token> tokens; std::vector<llama_token> tokens;
if (body.count("content") != 0) if (body.count("content") != 0)
{ {
tokens = llama.tokenizePrompt(body["content"]); tokens = llama.tokenize(body["content"], false);
} }
const json data = format_tokenizer_response(tokens); const json data = format_tokenizer_response(tokens);
return res.set_content(data.dump(), "application/json"); }); return res.set_content(data.dump(), "application/json"); });