diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 7cf1824f9..b87f67d07 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -8,6 +8,7 @@ //Python will ALWAYS provide the memory, we just write to it. #include +#include #include "model_adapter.h" #include "otherarch.h" @@ -85,7 +86,9 @@ static std::vector banned_token_ids; static std::vector top_picks; static int remaining_tokens = 0; static int stopper_unused_tokens = 0; +static std::mutex concat_output_mtx; static std::string concat_output = ""; +static std::string concat_output_reader_copy = ""; inline bool IsNanCheck(float f) { @@ -1039,12 +1042,17 @@ int gpttype_token_count(const std::string & input) const std::string & gpttype_get_pending_output() { - return concat_output; + concat_output_mtx.lock(); + concat_output_reader_copy = concat_output; + concat_output_mtx.unlock(); + return concat_output_reader_copy; } generation_outputs gpttype_generate(const generation_inputs inputs, generation_outputs &output) { + concat_output_mtx.lock(); concat_output = ""; + concat_output_mtx.unlock(); last_stop_reason = stop_reason::OUT_OF_TOKENS; stop_sequence.clear(); for(int x=0;x