From 0c47e7953768296c9756b914661ebe2b88d2e2fe Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 2 Oct 2023 11:05:19 +0800 Subject: [PATCH] updated the API routing path and fixed a bug with threads --- gpttype_adapter.cpp | 6 +++++- koboldcpp.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 39820712f..68db83448 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -788,6 +788,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in llama_ctx_params.rope_freq_base = rope_freq_base; llama_ctx_params.rope_freq_scale = rope_freq_scale; llama_ctx_params.n_batch = blasbatchsize; + llama_ctx_params.n_threads = n_threads; + llama_ctx_params.n_threads_batch = n_blasthreads; #if defined(GGML_USE_CUBLAS) bool ts_all_zero = true; @@ -1365,7 +1367,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o params.n_batch = bbs; //received reports of 1024 and above crashing on some models if(!ggml_cpu_has_gpublas()) { - params.n_threads = 1; //do not limit here anymore. + //does not limit here for gguf anymore. this is kept for older models. + //new models will override threads inside decode fn. + params.n_threads = 1; params.n_threads_batch = 1; } else diff --git a/koboldcpp.py b/koboldcpp.py index 744359025..900be3c10 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -367,7 +367,7 @@ maxhordelen = 256 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.45.1" +KcppVersion = "1.45.2" showdebug = True showsamplerwarning = True showmaxctxwarning = True @@ -583,7 +583,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): pendtxtStr = ctypes.string_at(pendtxt).decode("UTF-8","ignore") response_body = (json.dumps({"results": [{"text": pendtxtStr}]}).encode()) - elif self.path.endswith('/api/extra/oai/v1/models'): + elif self.path.endswith('/v1/models') or self.path.endswith('/models'): response_body = (json.dumps({"object":"list","data":[{"id":"koboldcpp","object":"model","created":1,"owned_by":"koboldcpp","permission":[],"root":"koboldcpp"}]}).encode()) elif self.path.endswith(('/api')) or self.path.endswith(('/api/v1')): @@ -684,7 +684,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): api_format = 2 kai_sse_stream_flag = True - if self.path.endswith('/api/extra/oai/v1/completions'): + if self.path.endswith('/v1/completions') or self.path.endswith('/completions'): api_format = 3 if api_format>0: