bump token limits
This commit is contained in:
parent
8bf6f7f8b0
commit
eb86cd4027
2 changed files with 5 additions and 5 deletions
4
expose.h
4
expose.h
|
@ -1,7 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
const int stop_token_max = 10;
|
const int stop_token_max = 16;
|
||||||
const int ban_token_max = 10;
|
const int ban_token_max = 16;
|
||||||
const int tensor_split_max = 16;
|
const int tensor_split_max = 16;
|
||||||
// match kobold's sampler list and order
|
// match kobold's sampler list and order
|
||||||
enum samplers
|
enum samplers
|
||||||
|
|
|
@ -11,9 +11,9 @@ import argparse
|
||||||
import json, sys, http.server, time, asyncio, socket, threading
|
import json, sys, http.server, time, asyncio, socket, threading
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
stop_token_max = 10
|
|
||||||
sampler_order_max = 7
|
sampler_order_max = 7
|
||||||
ban_token_max = 10
|
stop_token_max = 16
|
||||||
|
ban_token_max = 16
|
||||||
tensor_split_max = 16
|
tensor_split_max = 16
|
||||||
|
|
||||||
class load_model_inputs(ctypes.Structure):
|
class load_model_inputs(ctypes.Structure):
|
||||||
|
@ -749,7 +749,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
|
||||||
exitcounter = 999
|
exitcounter = 999
|
||||||
self.httpd.server_close()
|
self.httpd.server_close()
|
||||||
|
|
||||||
numThreads = 10
|
numThreads = 12
|
||||||
threadArr = []
|
threadArr = []
|
||||||
for i in range(numThreads):
|
for i in range(numThreads):
|
||||||
threadArr.append(Thread(i))
|
threadArr.append(Thread(i))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue