From a8842fdf56dc725b69c19332d46dc8bbf612069e Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Fri, 7 Jun 2024 14:27:29 +0000 Subject: [PATCH] Rename argument --- common/common.cpp | 8 ++++---- common/common.h | 2 +- examples/server/server.cpp | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 65448c918..c829fc792 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1460,12 +1460,12 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.chat_template = argv[i]; return true; } - if (arg == "--lcp-similarity") { + if (arg == "--slot-prompt-similarity" || arg == "-sps") { if (++i >= argc) { invalid_param = true; return true; } - params.lcp_similarity = std::stof(argv[i]); + params.slot_prompt_similarity = std::stof(argv[i]); return true; } if (arg == "-pps") { @@ -1839,8 +1839,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param "set custom jinja chat template (default: template taken from model's metadata)\n" "only commonly used templates are accepted:\n" "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" }); - options.push_back({ "server", " --lcp-similarity SIMILARITY", - "how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f)\n", params.lcp_similarity }); + options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY", + "how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity }); #ifndef LOG_DISABLE_LOGS options.push_back({ "logging" }); diff --git a/common/common.h b/common/common.h index 0a8a9c073..a093b05c4 100644 --- a/common/common.h +++ b/common/common.h @@ -202,7 +202,7 @@ struct gpt_params { std::string slot_save_path; - float lcp_similarity = 0.0f; + float slot_prompt_similarity = 0.5f; // batched-bench params bool is_pp_shared = false; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 802c660c7..0f3d03bc6 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -647,8 +647,8 @@ struct server_context { server_metrics metrics; - // Longest Common Prefix similarity for slot selection - float lcp_similarity = 0.0f; + // Necessary similarity of prompt for slot selection + float slot_prompt_similarity = 0.0f; ~server_context() { if (ctx) { @@ -812,7 +812,7 @@ struct server_context { server_slot * ret = nullptr; // find the slot that has at least n% prompt similarity - if (ret == nullptr && lcp_similarity != 0.0f && !prompt.empty()) { + if (ret == nullptr && slot_prompt_similarity != 0.0f && !prompt.empty()) { int max_lcp_len = 0; float similarity = 0; @@ -840,7 +840,7 @@ struct server_context { similarity = static_cast(lcp_len) / slot_prompt_len; // select the current slot if the criteria match - if (lcp_len > max_lcp_len && similarity > lcp_similarity) { + if (lcp_len > max_lcp_len && similarity > slot_prompt_similarity) { max_lcp_len = lcp_len; ret = &slot; } @@ -2568,8 +2568,8 @@ int main(int argc, char ** argv) { log_data["api_key"] = "api_key: " + std::to_string(params.api_keys.size()) + " keys loaded"; } - // Longest Common Prefix similarity for slot selection - ctx_server.lcp_similarity = params.lcp_similarity; + // Necessary similarity of prompt for slot selection + ctx_server.slot_prompt_similarity = params.slot_prompt_similarity; // load the model if (!ctx_server.load_model(params)) {