Fix race condition by locking concat_output string

Writer thread was appending to concat_output global string without a lock, while another thread could be reading the string invoked by HTTP API. Appending to std::string is not an atomic operation. Worst case would be if string was reallocated while being read. Fix it by locking the access in writer and reader with a mutex.
2023-09-01 07:08:50 +02:00 · 2023-09-01 07:08:50 +02:00 · 30588617fb
commit 30588617fb
parent b6914ebd04
1 changed files with 11 additions and 1 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -8,6 +8,7 @@
 //Python will ALWAYS provide the memory, we just write to it.

 #include <time.h>
+#include <mutex>
 #include "model_adapter.h"
 #include "otherarch.h"

@ -85,7 +86,9 @@ static std::vector<int> banned_token_ids;
 static std::vector<llama_token_data> top_picks;
 static int remaining_tokens = 0;
 static int stopper_unused_tokens = 0;
+static std::mutex concat_output_mtx;
 static std::string concat_output = "";
+static std::string concat_output_reader_copy = "";

 inline bool IsNanCheck(float f)
 {
@ -1039,12 +1042,17 @@ int gpttype_token_count(const std::string & input)

 const std::string & gpttype_get_pending_output()
 {
-    return concat_output;
+    concat_output_mtx.lock();
+    concat_output_reader_copy = concat_output;
+    concat_output_mtx.unlock();
+    return concat_output_reader_copy;
 }

 generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output)
 {
+    concat_output_mtx.lock();
    concat_output = "";
+    concat_output_mtx.unlock();
    last_stop_reason = stop_reason::OUT_OF_TOKENS;
    stop_sequence.clear();
    for(int x=0;x<stop_token_max;++x)
@ -1570,7 +1578,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                {
                    generated_tokens.push_back(tokenizedstr);
                }
+                concat_output_mtx.lock();
                concat_output += tokenizedstr;
+                concat_output_mtx.unlock();
            }

            if (startedsampling && debugmode!=-1)