From 30588617fba54b2cef9a204b4f80da8b4e266735 Mon Sep 17 00:00:00 2001 From: Elbios <3lbios@gmail.com> Date: Fri, 1 Sep 2023 07:08:50 +0200 Subject: [PATCH] Fix race condition by locking concat_output string Writer thread was appending to concat_output global string without a lock, while another thread could be reading the string invoked by HTTP API. Appending to std::string is not an atomic operation. Worst case would be if string was reallocated while being read. Fix it by locking the access in writer and reader with a mutex. --- gpttype_adapter.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 7cf1824f9..b87f67d07 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -8,6 +8,7 @@ //Python will ALWAYS provide the memory, we just write to it. #include +#include #include "model_adapter.h" #include "otherarch.h" @@ -85,7 +86,9 @@ static std::vector banned_token_ids; static std::vector top_picks; static int remaining_tokens = 0; static int stopper_unused_tokens = 0; +static std::mutex concat_output_mtx; static std::string concat_output = ""; +static std::string concat_output_reader_copy = ""; inline bool IsNanCheck(float f) { @@ -1039,12 +1042,17 @@ int gpttype_token_count(const std::string & input) const std::string & gpttype_get_pending_output() { - return concat_output; + concat_output_mtx.lock(); + concat_output_reader_copy = concat_output; + concat_output_mtx.unlock(); + return concat_output_reader_copy; } generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output) { + concat_output_mtx.lock(); concat_output = ""; + concat_output_mtx.unlock(); last_stop_reason = stop_reason::OUT_OF_TOKENS; stop_sequence.clear(); for(int x=0;x