bump token limits

2023-09-27 01:26:00 +08:00 · 2023-09-27 01:26:00 +08:00 · eb86cd4027
commit eb86cd4027
parent 8bf6f7f8b0
2 changed files with 5 additions and 5 deletions
--- a/expose.h
+++ b/expose.h
@ -1,7 +1,7 @@
 #pragma once

-const int stop_token_max = 10;
-const int ban_token_max = 10;
+const int stop_token_max = 16;
+const int ban_token_max = 16;
 const int tensor_split_max = 16;
 // match kobold's sampler list and order
 enum samplers
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -11,9 +11,9 @@ import argparse
 import json, sys, http.server, time, asyncio, socket, threading
 from concurrent.futures import ThreadPoolExecutor

-stop_token_max = 10
 sampler_order_max = 7
-ban_token_max = 10
+stop_token_max = 16
+ban_token_max = 16
 tensor_split_max = 16

 class load_model_inputs(ctypes.Structure):
@ -749,7 +749,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
            exitcounter = 999
            self.httpd.server_close()

-    numThreads = 10
+    numThreads = 12
    threadArr = []
    for i in range(numThreads):
        threadArr.append(Thread(i))