diff --git a/expose.h b/expose.h
index 535e11374..fdef1e252 100644
--- a/expose.h
+++ b/expose.h
@@ -1,7 +1,7 @@
 #pragma once
 
-const int stop_token_max = 10;
-const int ban_token_max = 10;
+const int stop_token_max = 16;
+const int ban_token_max = 16;
 const int tensor_split_max = 16;
 // match kobold's sampler list and order
 enum samplers
diff --git a/koboldcpp.py b/koboldcpp.py
index 789d81a40..720cd251d 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -11,9 +11,9 @@ import argparse
 import json, sys, http.server, time, asyncio, socket, threading
 from concurrent.futures import ThreadPoolExecutor
 
-stop_token_max = 10
 sampler_order_max = 7
-ban_token_max = 10
+stop_token_max = 16
+ban_token_max = 16
 tensor_split_max = 16
 
 class load_model_inputs(ctypes.Structure):
@@ -749,7 +749,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
             exitcounter = 999
             self.httpd.server_close()
 
-    numThreads = 10
+    numThreads = 12
     threadArr = []
     for i in range(numThreads):
         threadArr.append(Thread(i))