Merge remote-tracking branch 'kalo/patch-1' into concedo_experimental

2023-09-07 10:43:19 +08:00 · 2023-09-07 10:43:19 +08:00 · 49089371d1
commit 49089371d1
parent a0aa620718 c8cc7f1a19
1 changed files with 3 additions and 3 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -434,7 +434,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
        incomplete_token_buffer = bytearray()
        while not handle.has_finished():
-            if current_token < handle.get_stream_count():
+            while current_token < handle.get_stream_count():
                token = handle.new_token(current_token)
                if token is None: # Token isnt ready yet, received nullpointer
@ -451,7 +451,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
                    event_str = json.dumps(event_data)
                    await self.send_sse_event("message", event_str)
-            await asyncio.sleep(0)
+            await asyncio.sleep(0.1)
        # flush buffers, sleep a bit to make sure all data sent, and then force close the connection
        self.wfile.flush()
@ -1803,4 +1803,4 @@ if __name__ == '__main__':
    parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
    parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
-    main(parser.parse_args(),start_server=True)
+    main(parser.parse_args(),start_server=True)