From a8673be66e1ef5a4952ead68910446875f0fba08 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 7 Sep 2023 10:48:53 +0800 Subject: [PATCH] tidy up the new sse stream code --- koboldcpp.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index f9028ee89..f1b1a70b7 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -434,11 +434,12 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): incomplete_token_buffer = bytearray() while not handle.has_finished(): - while current_token < handle.get_stream_count(): + streamcount = handle.get_stream_count() + while current_token < streamcount: token = handle.new_token(current_token) if token is None: # Token isnt ready yet, received nullpointer - continue + break current_token += 1 @@ -451,7 +452,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): event_str = json.dumps(event_data) await self.send_sse_event("message", event_str) - await asyncio.sleep(0.1) + await asyncio.sleep(0.02) #this should keep things responsive # flush buffers, sleep a bit to make sure all data sent, and then force close the connection self.wfile.flush() @@ -1803,4 +1804,4 @@ if __name__ == '__main__': parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0) parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') - main(parser.parse_args(),start_server=True) + main(parser.parse_args(),start_server=True) \ No newline at end of file