better error handling, try to avoid segfault in sillytavern
This commit is contained in:
parent
c05d195583
commit
2378a29bde
1 changed files with 42 additions and 36 deletions
78
koboldcpp.py
78
koboldcpp.py
|
@ -568,43 +568,47 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
current_token = 0
|
current_token = 0
|
||||||
incomplete_token_buffer = bytearray()
|
incomplete_token_buffer = bytearray()
|
||||||
await asyncio.sleep(0.05) #anti race condition, prevent check from overtaking generate
|
await asyncio.sleep(0.05) #anti race condition, prevent check from overtaking generate
|
||||||
while True:
|
try:
|
||||||
streamDone = handle.has_finished() #exit next loop on done
|
while True:
|
||||||
tokenStr = ""
|
streamDone = handle.has_finished() #exit next loop on done
|
||||||
streamcount = handle.get_stream_count()
|
|
||||||
while current_token < streamcount:
|
|
||||||
token = handle.new_token(current_token)
|
|
||||||
|
|
||||||
if token is None: # Token isnt ready yet, received nullpointer
|
|
||||||
break
|
|
||||||
|
|
||||||
current_token += 1
|
|
||||||
newbyte = ctypes.string_at(token)
|
|
||||||
incomplete_token_buffer += bytearray(newbyte)
|
|
||||||
tokenSeg = incomplete_token_buffer.decode("UTF-8","ignore")
|
|
||||||
if tokenSeg!="":
|
|
||||||
incomplete_token_buffer.clear()
|
|
||||||
tokenStr += tokenSeg
|
|
||||||
|
|
||||||
if tokenStr!="":
|
|
||||||
if api_format == 4: # if oai chat, set format to expected openai streaming response
|
|
||||||
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","delta":{'role':'assistant','content':tokenStr}}]})
|
|
||||||
await self.send_oai_sse_event(event_str)
|
|
||||||
elif api_format == 3: # non chat completions
|
|
||||||
event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","text":tokenStr}]})
|
|
||||||
await self.send_oai_sse_event(event_str)
|
|
||||||
else:
|
|
||||||
event_str = json.dumps({"token": tokenStr})
|
|
||||||
await self.send_kai_sse_event(event_str)
|
|
||||||
tokenStr = ""
|
tokenStr = ""
|
||||||
|
streamcount = handle.get_stream_count()
|
||||||
|
while current_token < streamcount:
|
||||||
|
token = handle.new_token(current_token)
|
||||||
|
|
||||||
else:
|
if token is None: # Token isnt ready yet, received nullpointer
|
||||||
await asyncio.sleep(0.02) #this should keep things responsive
|
break
|
||||||
|
|
||||||
if streamDone:
|
current_token += 1
|
||||||
if api_format == 4: # if oai chat, send last [DONE] message consistent with openai format
|
newbyte = ctypes.string_at(token)
|
||||||
await self.send_oai_sse_event('[DONE]')
|
incomplete_token_buffer += bytearray(newbyte)
|
||||||
break
|
tokenSeg = incomplete_token_buffer.decode("UTF-8","ignore")
|
||||||
|
if tokenSeg!="":
|
||||||
|
incomplete_token_buffer.clear()
|
||||||
|
tokenStr += tokenSeg
|
||||||
|
|
||||||
|
if tokenStr!="":
|
||||||
|
if api_format == 4: # if oai chat, set format to expected openai streaming response
|
||||||
|
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","delta":{'role':'assistant','content':tokenStr}}]})
|
||||||
|
await self.send_oai_sse_event(event_str)
|
||||||
|
elif api_format == 3: # non chat completions
|
||||||
|
event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","text":tokenStr}]})
|
||||||
|
await self.send_oai_sse_event(event_str)
|
||||||
|
else:
|
||||||
|
event_str = json.dumps({"token": tokenStr})
|
||||||
|
await self.send_kai_sse_event(event_str)
|
||||||
|
tokenStr = ""
|
||||||
|
|
||||||
|
else:
|
||||||
|
await asyncio.sleep(0.02) #this should keep things responsive
|
||||||
|
|
||||||
|
if streamDone:
|
||||||
|
if api_format == 4: # if oai chat, send last [DONE] message consistent with openai format
|
||||||
|
await self.send_oai_sse_event('[DONE]')
|
||||||
|
break
|
||||||
|
except Exception as ex:
|
||||||
|
print("SSE streaming was interrupted due to an exception")
|
||||||
|
print(ex)
|
||||||
|
|
||||||
# flush buffers, sleep a bit to make sure all data sent, and then force close the connection
|
# flush buffers, sleep a bit to make sure all data sent, and then force close the connection
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
|
@ -629,7 +633,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
except (BrokenPipeError, ConnectionAbortedError) as cae: # attempt to abort if connection lost
|
except (BrokenPipeError, ConnectionAbortedError) as cae: # attempt to abort if connection lost
|
||||||
print(cae)
|
print(cae)
|
||||||
handle.abort_generate()
|
handle.abort_generate()
|
||||||
time.sleep(0.1) #short delay
|
time.sleep(0.2) #short delay
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
@ -938,8 +942,10 @@ Enter Prompt:<br>
|
||||||
self.send_header('content-length', str(len(genresp)))
|
self.send_header('content-length', str(len(genresp)))
|
||||||
self.end_headers(content_type='application/json')
|
self.end_headers(content_type='application/json')
|
||||||
self.wfile.write(genresp)
|
self.wfile.write(genresp)
|
||||||
except:
|
except Exception as ex:
|
||||||
print("Generate: The response could not be sent, maybe connection was terminated?")
|
print("Generate: The response could not be sent, maybe connection was terminated?")
|
||||||
|
handle.abort_generate()
|
||||||
|
time.sleep(0.2) #short delay
|
||||||
return
|
return
|
||||||
finally:
|
finally:
|
||||||
modelbusy.release()
|
modelbusy.release()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue