diff --git a/expose.h b/expose.h index 535e11374..fdef1e252 100644 --- a/expose.h +++ b/expose.h @@ -1,7 +1,7 @@ #pragma once -const int stop_token_max = 10; -const int ban_token_max = 10; +const int stop_token_max = 16; +const int ban_token_max = 16; const int tensor_split_max = 16; // match kobold's sampler list and order enum samplers diff --git a/koboldcpp.py b/koboldcpp.py index 789d81a40..720cd251d 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -11,9 +11,9 @@ import argparse import json, sys, http.server, time, asyncio, socket, threading from concurrent.futures import ThreadPoolExecutor -stop_token_max = 10 sampler_order_max = 7 -ban_token_max = 10 +stop_token_max = 16 +ban_token_max = 16 tensor_split_max = 16 class load_model_inputs(ctypes.Structure): @@ -749,7 +749,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None): exitcounter = 999 self.httpd.server_close() - numThreads = 10 + numThreads = 12 threadArr = [] for i in range(numThreads): threadArr.append(Thread(i))