llama : infill sampling handle very long tokens (#9924)

* llama : infill sampling handle very long tokens ggml-ci * cont : better indices ggml-ci
2024-10-17 22:32:47 +03:00 · 2024-10-17 22:32:47 +03:00 · 99bd4ac28c
commit 99bd4ac28c
parent 3752217ed5
4 changed files with 35 additions and 43 deletions
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@ -1858,23 +1858,6 @@ int32_t llama_token_to_piece_impl(const struct llama_vocab & vocab, llama_token
    return 0;
 }

-bool llama_token_is_prefix_impl(
-        const struct llama_vocab & vocab,
-                     llama_token   token0,
-                     llama_token   token1) {
-    char text_buf_0[128];
-    char text_buf_1[128];
-
-    const int32_t len0 = llama_token_to_piece_impl(vocab, token0, text_buf_0, sizeof(text_buf_0) - 1, 0, false);
-    const int32_t len1 = llama_token_to_piece_impl(vocab, token1, text_buf_1, sizeof(text_buf_1) - 1, 0, false);
-
-    if (len0 <= 0 || len1 <= 0) {
-        return false;
-    }
-
-    return len0 <= len1 && memcmp(text_buf_0, text_buf_1, len0) == 0;
-}
-
 int32_t llama_detokenize_impl(
        const struct llama_vocab & vocab,
               const llama_token * tokens,