From aa4277cf973ce8857c1fb114dc255885bfb7fc9e Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Fri, 1 Nov 2024 17:52:07 +0000 Subject: [PATCH] server : fix slot selection by lru, migrate lcs to `size_t` --- examples/server/server.cpp | 1 + examples/server/utils.hpp | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 54cdb4b72..23ae949a7 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -247,6 +247,7 @@ struct server_slot { if (is_processing()) { SLT_INF(*this, "stop processing: n_past = %d, truncated = %d\n", n_past, truncated); + t_last_used = ggml_time_us(); t_token_generation = (ggml_time_us() - t_start_generation) / 1e3; state = SLOT_STATE_IDLE; callback_on_release(id); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 871a17a4f..c47ed3e47 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -453,20 +453,20 @@ static size_t longest_common_subsequence(const llama_tokens & a, const llama_tok } // get the lengths of the input sequences - int a_len = a.size(); - int b_len = b.size(); + size_t a_len = a.size(); + size_t b_len = b.size(); // initialize the maximum length of the longest common subsequence (LCS) - int max_length = 0; + size_t max_length = 0; // use two rows instead of a 2D matrix to optimize space - std::vector prev_row(b_len + 1, 0); - std::vector curr_row(b_len + 1, 0); + std::vector prev_row(b_len + 1, 0); + std::vector curr_row(b_len + 1, 0); // iterate through the elements of a - for (int i = 1; i <= a_len; i++) { + for (size_t i = 1; i <= a_len; i++) { // iterate through the elements of b - for (int j = 1; j <= b_len; j++) { + for (size_t j = 1; j <= b_len; j++) { // if elements at the current positions match if (a[i - 1] == b[j - 1]) { // if it's the first element of either sequences, set LCS length to 1