updated the API routing path and fixed a bug with threads
This commit is contained in:
parent
dffc6bee74
commit
0c47e79537
2 changed files with 8 additions and 4 deletions
|
@ -788,6 +788,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
llama_ctx_params.rope_freq_base = rope_freq_base;
|
||||
llama_ctx_params.rope_freq_scale = rope_freq_scale;
|
||||
llama_ctx_params.n_batch = blasbatchsize;
|
||||
llama_ctx_params.n_threads = n_threads;
|
||||
llama_ctx_params.n_threads_batch = n_blasthreads;
|
||||
|
||||
#if defined(GGML_USE_CUBLAS)
|
||||
bool ts_all_zero = true;
|
||||
|
@ -1365,7 +1367,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
params.n_batch = bbs; //received reports of 1024 and above crashing on some models
|
||||
if(!ggml_cpu_has_gpublas())
|
||||
{
|
||||
params.n_threads = 1; //do not limit here anymore.
|
||||
//does not limit here for gguf anymore. this is kept for older models.
|
||||
//new models will override threads inside decode fn.
|
||||
params.n_threads = 1;
|
||||
params.n_threads_batch = 1;
|
||||
}
|
||||
else
|
||||
|
|
|
@ -367,7 +367,7 @@ maxhordelen = 256
|
|||
modelbusy = threading.Lock()
|
||||
requestsinqueue = 0
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.45.1"
|
||||
KcppVersion = "1.45.2"
|
||||
showdebug = True
|
||||
showsamplerwarning = True
|
||||
showmaxctxwarning = True
|
||||
|
@ -583,7 +583,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
pendtxtStr = ctypes.string_at(pendtxt).decode("UTF-8","ignore")
|
||||
response_body = (json.dumps({"results": [{"text": pendtxtStr}]}).encode())
|
||||
|
||||
elif self.path.endswith('/api/extra/oai/v1/models'):
|
||||
elif self.path.endswith('/v1/models') or self.path.endswith('/models'):
|
||||
response_body = (json.dumps({"object":"list","data":[{"id":"koboldcpp","object":"model","created":1,"owned_by":"koboldcpp","permission":[],"root":"koboldcpp"}]}).encode())
|
||||
|
||||
elif self.path.endswith(('/api')) or self.path.endswith(('/api/v1')):
|
||||
|
@ -684,7 +684,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
api_format = 2
|
||||
kai_sse_stream_flag = True
|
||||
|
||||
if self.path.endswith('/api/extra/oai/v1/completions'):
|
||||
if self.path.endswith('/v1/completions') or self.path.endswith('/completions'):
|
||||
api_format = 3
|
||||
|
||||
if api_format>0:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue