server : smart slot selection using Longest Common Prefix (#7728)
* server : Smart selection of available slot using Longest Common Substring * add usage * remove trailing whitespaces * Use Longest Common Prefix (LCP) instead of LCS * Rename argument
This commit is contained in:
parent
da799b4189
commit
7a16ce7db2
4 changed files with 140 additions and 17 deletions
|
@ -1491,6 +1491,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
params.chat_template = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--slot-prompt-similarity" || arg == "-sps") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.slot_prompt_similarity = std::stof(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "-pps") {
|
||||
params.is_pp_shared = true;
|
||||
return true;
|
||||
|
@ -1913,6 +1921,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
|||
"set custom jinja chat template (default: template taken from model's metadata)\n"
|
||||
"only commonly used templates are accepted:\n"
|
||||
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
|
||||
options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY",
|
||||
"how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity });
|
||||
|
||||
#ifndef LOG_DISABLE_LOGS
|
||||
options.push_back({ "logging" });
|
||||
|
|
|
@ -203,6 +203,8 @@ struct gpt_params {
|
|||
|
||||
std::string slot_save_path;
|
||||
|
||||
float slot_prompt_similarity = 0.5f;
|
||||
|
||||
// batched-bench params
|
||||
bool is_pp_shared = false;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue