buffer incomplete multi-byte characters

2023-05-31 10:40:42 -03:00 · 2023-05-31 10:40:42 -03:00 · dd30219332
commit dd30219332
parent 27911d6d68
1 changed files with 35 additions and 2 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -842,16 +842,49 @@ int main(int argc, char **argv)
              "application/json");
      } else {
          const auto chunked_content_provider = [&](size_t, DataSink &sink) {
+              size_t sent_count = 0;
+              int32_t multibyte_pending = 0;
+
              while (llama.has_next_token) {
                  std::string token_text = llama.doCompletion();

+                  if (multibyte_pending > 0) {
+                      multibyte_pending -= token_text.size();
+                  } else if (token_text.size() == 1) {
+                      const char c = token_text[0];
+                      // 2-byte characters: 110xxxxx 10xxxxxx
+                      if ((c & 0xE0) == 0xC0) {
+                          multibyte_pending = 1;
+                      // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
+                      } else if ((c & 0xF0) == 0xE0) {
+                          multibyte_pending = 2;
+                      // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+                      } else if ((c & 0xF8) == 0xF0) {
+                          multibyte_pending = 3;
+                      } else {
+                          multibyte_pending = 0;
+                      }
+                  }
+
+                  if (multibyte_pending > 0) {
+                      if (!llama.has_next_token) {
+                          llama.has_next_token = true;
+                          llama.n_remain++;
+                      }
+                      continue;
+                  }
+
+                  const size_t pos = std::min(sent_count, llama.generated_text.size());
+                  std::string to_send = llama.generated_text.substr(pos);
+                  sent_count += to_send.size();
+
                  json data;
                  if (llama.has_next_token) {
-                      data = {{"content", token_text}, {"stop", false}};
+                      data = {{"content", to_send}, {"stop", false}};
                  } else {
                      // Generation is done, send extra information.
                      data = {
-                          {"content", token_text},
+                          {"content", to_send},
                          {"stop", true},
                          {"model", llama.params.model_alias},
                          {"tokens_predicted", llama.num_tokens_predicted},