From 043f29877539d1f851a94aad788021d7c1a0b3be Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Tue, 7 May 2024 18:14:33 +0200 Subject: [PATCH] fix: fix infinite recursion --- examples/server/tests/features/embeddings.feature | 9 +-------- unicode.cpp | 6 +++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature index 7057fc5aa..bbb386db5 100644 --- a/examples/server/tests/features/embeddings.feature +++ b/examples/server/tests/features/embeddings.feature @@ -16,17 +16,10 @@ Feature: llama.cpp server Then the server is starting Then the server is healthy - Scenario: Embedding - When embeddings are computed for: - """ - What is the capital of Bulgaria ? - """ - Then embeddings are generated - Scenario: Tokenize / Detokenize complex When tokenizing: """ - España is a èspciâl café über naïve résumé cañón élite cañas Barça 例子 東京 こんにちは 你好 中国 + España is your's mine's l'heure èspciâl café über naïve résumé cañón élite cañas Barça 例子 東京 こんにちは 你好 中国 """ Then tokens can be detokenize and is equivalent False diff --git a/unicode.cpp b/unicode.cpp index d141cc1cc..341cac025 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -496,7 +496,11 @@ std::vector canonical_decomposition_cpts(std::vector & cpts, if (it.first != it.second) { uint offset = 0; for (auto jt = it.first; jt != it.second; jt++) { - cpts.emplace(cpts.begin() + i + offset, jt->second); + if (offset == 0) { + cpts[i] = jt->second; + } else { + cpts.emplace(cpts.begin() + i + offset, jt->second); + } offset++; } const auto & inner_result = canonical_decomposition_cpts(cpts, i);