From 30588617fba54b2cef9a204b4f80da8b4e266735 Mon Sep 17 00:00:00 2001
From: Elbios <3lbios@gmail.com>
Date: Fri, 1 Sep 2023 07:08:50 +0200
Subject: [PATCH] Fix race condition by locking concat_output string

Writer thread was appending to concat_output global string without a lock, while another thread could be reading the string invoked by HTTP API.
Appending to std::string is not an atomic operation. Worst case would be if string was reallocated while being read.
Fix it by locking the access in writer and reader with a mutex.
---
 gpttype_adapter.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 7cf1824f9..b87f67d07 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -8,6 +8,7 @@
 //Python will ALWAYS provide the memory, we just write to it.
 
 #include <time.h>
+#include <mutex>
 #include "model_adapter.h"
 #include "otherarch.h"
 
@@ -85,7 +86,9 @@ static std::vector<int> banned_token_ids;
 static std::vector<llama_token_data> top_picks;
 static int remaining_tokens = 0;
 static int stopper_unused_tokens = 0;
+static std::mutex concat_output_mtx;
 static std::string concat_output = "";
+static std::string concat_output_reader_copy = "";
 
 inline bool IsNanCheck(float f)
 {
@@ -1039,12 +1042,17 @@ int gpttype_token_count(const std::string & input)
 
 const std::string & gpttype_get_pending_output()
 {
-    return concat_output;
+    concat_output_mtx.lock();
+    concat_output_reader_copy = concat_output;
+    concat_output_mtx.unlock();
+    return concat_output_reader_copy;
 }
 
 generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output)
 {
+    concat_output_mtx.lock();
     concat_output = "";
+    concat_output_mtx.unlock();
     last_stop_reason = stop_reason::OUT_OF_TOKENS;
     stop_sequence.clear();
     for(int x=0;x<stop_token_max;++x)
@@ -1570,7 +1578,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                 {
                     generated_tokens.push_back(tokenizedstr);
                 }
+                concat_output_mtx.lock();
                 concat_output += tokenizedstr;
+                concat_output_mtx.unlock();
             }
 
             if (startedsampling && debugmode!=-1)