From dd3021933232573bfdde2cb249c22ab332d353f3 Mon Sep 17 00:00:00 2001 From: anon Date: Wed, 31 May 2023 10:40:42 -0300 Subject: [PATCH] buffer incomplete multi-byte characters --- examples/server/server.cpp | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fbfcc6b7f..b78992a13 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -842,16 +842,49 @@ int main(int argc, char **argv) "application/json"); } else { const auto chunked_content_provider = [&](size_t, DataSink &sink) { + size_t sent_count = 0; + int32_t multibyte_pending = 0; + while (llama.has_next_token) { std::string token_text = llama.doCompletion(); + if (multibyte_pending > 0) { + multibyte_pending -= token_text.size(); + } else if (token_text.size() == 1) { + const char c = token_text[0]; + // 2-byte characters: 110xxxxx 10xxxxxx + if ((c & 0xE0) == 0xC0) { + multibyte_pending = 1; + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + } else if ((c & 0xF0) == 0xE0) { + multibyte_pending = 2; + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + } else if ((c & 0xF8) == 0xF0) { + multibyte_pending = 3; + } else { + multibyte_pending = 0; + } + } + + if (multibyte_pending > 0) { + if (!llama.has_next_token) { + llama.has_next_token = true; + llama.n_remain++; + } + continue; + } + + const size_t pos = std::min(sent_count, llama.generated_text.size()); + std::string to_send = llama.generated_text.substr(pos); + sent_count += to_send.size(); + json data; if (llama.has_next_token) { - data = {{"content", token_text}, {"stop", false}}; + data = {{"content", to_send}, {"stop", false}}; } else { // Generation is done, send extra information. data = { - {"content", token_text}, + {"content", to_send}, {"stop", true}, {"model", llama.params.model_alias}, {"tokens_predicted", llama.num_tokens_predicted},