Proposed streaming improvements

This commit is contained in:
kalomaze 2023-09-05 16:46:30 -05:00 committed by GitHub
parent b6914ebd04
commit c8cc7f1a19
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -428,7 +428,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
incomplete_token_buffer = bytearray()
while not handle.has_finished():
if current_token < handle.get_stream_count():
while current_token < handle.get_stream_count():
token = handle.new_token(current_token)
if token is None: # Token isnt ready yet, received nullpointer
@ -445,7 +445,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
event_str = json.dumps(event_data)
await self.send_sse_event("message", event_str)
await asyncio.sleep(0)
await asyncio.sleep(0.1)
# flush buffers, sleep a bit to make sure all data sent, and then force close the connection
self.wfile.flush()
@ -1796,4 +1796,4 @@ if __name__ == '__main__':
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
main(parser.parse_args(),start_server=True)
main(parser.parse_args(),start_server=True)