added onready argument to execute a command after load is done
This commit is contained in:
parent
6667fdcec8
commit
74384cfbb5
1 changed files with 11 additions and 1 deletions
12
koboldcpp.py
12
koboldcpp.py
|
@ -335,7 +335,7 @@ maxhordectx = 1024
|
||||||
maxhordelen = 256
|
maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.43"
|
KcppVersion = "1.44"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
@ -1757,6 +1757,15 @@ def main(launch_args,start_server=True):
|
||||||
horde_thread.daemon = True
|
horde_thread.daemon = True
|
||||||
horde_thread.start()
|
horde_thread.start()
|
||||||
|
|
||||||
|
#if post-ready script specified, execute it
|
||||||
|
if args.onready:
|
||||||
|
def onready_subprocess():
|
||||||
|
import subprocess
|
||||||
|
print("Starting Post-Load subprocess...")
|
||||||
|
subprocess.Popen(args.onready[0], shell=True)
|
||||||
|
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
|
||||||
|
timer_thread.start()
|
||||||
|
|
||||||
if start_server:
|
if start_server:
|
||||||
print(f"Please connect to custom endpoint at {epurl}")
|
print(f"Please connect to custom endpoint at {epurl}")
|
||||||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||||
|
@ -1808,5 +1817,6 @@ if __name__ == '__main__':
|
||||||
compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq'])
|
compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID] [mmq]'), choices=['normal', 'lowvram', '0', '1', '2', '3', 'mmq'])
|
||||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||||
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
||||||
|
parser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", type=str, default="",nargs=1)
|
||||||
|
|
||||||
main(parser.parse_args(),start_server=True)
|
main(parser.parse_args(),start_server=True)
|
Loading…
Add table
Add a link
Reference in a new issue