server : implement cancellable request (#11285)

* server : implement cancellable request

* fix typo

* httplib 0.18.5

* fix i underflow
This commit is contained in:
Xuan Son Nguyen 2025-01-18 14:12:05 +01:00 committed by GitHub
parent f26c874179
commit f30f099228
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 1396 additions and 431 deletions

View file

@ -1,4 +1,5 @@
import pytest
import requests
import time
from openai import OpenAI
from utils import *
@ -405,3 +406,23 @@ def test_n_probs_post_sampling():
assert "bytes" in prob and type(prob["bytes"]) == list
# because the test model usually output token with either 100% or 0% probability, we need to check all the top_probs
assert any(prob["prob"] == 1.0 for prob in tok["top_probs"])
def test_cancel_request():
global server
server.n_ctx = 4096
server.n_predict = -1
server.n_slots = 1
server.server_slots = True
server.start()
# send a request that will take a long time, but cancel it before it finishes
try:
server.make_request("POST", "/completion", data={
"prompt": "I believe the meaning of life is",
}, timeout=0.1)
except requests.exceptions.ReadTimeout:
pass # expected
# make sure the slot is free
time.sleep(1) # wait for HTTP_POLLING_SECONDS
res = server.make_request("GET", "/slots")
assert res.body[0]["is_processing"] == False