From bddb1efc1f50db7c3d3513027313596c44ddb2a0 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Tue, 4 Feb 2025 14:10:30 +0100 Subject: [PATCH] common : change longest common subsequence to substring [no ci] This commit updates the comments in the code to reflect that the function common_lcs() is actually computing the length of the longest common substring between two sequences (consecutive elements), not the longest common subsequence. The motivation for this change it to clarify the intent of the function. --- common/common.cpp | 2 +- common/common.h | 2 +- examples/server/server.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index edba6fb4b..c91ea29c7 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1652,7 +1652,7 @@ size_t common_lcs(const llama_tokens & a, const llama_tokens & b) { size_t a_len = a.size(); size_t b_len = b.size(); - // initialize the maximum length of the longest common subsequence (LCS) + // initialize the maximum length of the longest common substring (LCS) size_t max_length = 0; // use two rows instead of a 2D matrix to optimize space diff --git a/common/common.h b/common/common.h index b208d0c7e..554faa3c6 100644 --- a/common/common.h +++ b/common/common.h @@ -562,7 +562,7 @@ void common_batch_add( // longest common prefix size_t common_lcp(const llama_tokens & a, const llama_tokens & b); -// longet common subsequence +// longest common substring size_t common_lcs(const llama_tokens & a, const llama_tokens & b); // diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e0acc4705..07a6ed0e4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2008,10 +2008,10 @@ struct server_context { continue; } - // length of the Longest Common Subsequence between the current slot's prompt and the input prompt + // length of the Longest Common Substring between the current slot's prompt and the input prompt int cur_lcs_len = common_lcs(slot.cache_tokens, task.prompt_tokens); - // fraction of the common subsequence length compared to the current slot's prompt length + // fraction of the common substring length compared to the current slot's prompt length float cur_similarity = static_cast(cur_lcs_len) / static_cast(slot.cache_tokens.size()); // select the current slot if the criteria match