Breaking Change: Remove deprecated commands
This commit is contained in:
parent
5d3e142145
commit
d10470a1e3
5 changed files with 49 additions and 136 deletions
5
class.py
5
class.py
|
@ -268,9 +268,8 @@ class model_backend(InferenceModel):
|
||||||
if not kcpp_backend_loaded:
|
if not kcpp_backend_loaded:
|
||||||
kcppargs = KcppArgsObject(model=self.kcpp_filename, model_param=self.kcpp_filename,
|
kcppargs = KcppArgsObject(model=self.kcpp_filename, model_param=self.kcpp_filename,
|
||||||
port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
|
port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
|
||||||
psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize,
|
highpriority=False, contextsize=self.kcpp_ctxsize, blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase],
|
||||||
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
|
smartcontext=self.kcpp_smartcontext, bantokens=None, forceversion=0, nommap=self.kcpp_nommap,
|
||||||
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
|
||||||
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
||||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
||||||
onready='', multiuser=False, foreground=False)
|
onready='', multiuser=False, foreground=False)
|
||||||
|
|
78
colab.ipynb
78
colab.ipynb
|
@ -1,29 +1,10 @@
|
||||||
{
|
{
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 0,
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"private_outputs": true,
|
|
||||||
"provenance": [],
|
|
||||||
"gpuType": "T4",
|
|
||||||
"authorship_tag": "",
|
|
||||||
"include_colab_link": true
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"name": "python3",
|
|
||||||
"display_name": "Python 3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"name": "python"
|
|
||||||
},
|
|
||||||
"accelerator": "GPU"
|
|
||||||
},
|
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "view-in-github",
|
"colab_type": "text",
|
||||||
"colab_type": "text"
|
"id": "view-in-github"
|
||||||
},
|
},
|
||||||
"source": []
|
"source": []
|
||||||
},
|
},
|
||||||
|
@ -36,24 +17,43 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n",
|
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
|
||||||
"\n",
|
"\r\n",
|
||||||
"Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n",
|
"Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\r\n",
|
||||||
"Layers = 43 #@param [43]{allow-input: true}\n",
|
"Layers = 43 #@param [43]{allow-input: true}\r\n",
|
||||||
"\n",
|
"\r\n",
|
||||||
"%cd /content\n",
|
"%cd /content\r\n",
|
||||||
"!git clone https://github.com/LostRuins/koboldcpp\n",
|
"!git clone https://github.com/LostRuins/koboldcpp\r\n",
|
||||||
"%cd /content/koboldcpp\n",
|
"%cd /content/koboldcpp\r\n",
|
||||||
"!make LLAMA_CUBLAS=1\n",
|
"!make LLAMA_CUBLAS=1\r\n",
|
||||||
"\n",
|
"\r\n",
|
||||||
"!wget $Model -O model.ggml\n",
|
"!wget $Model -O model.ggml\r\n",
|
||||||
"!wget -c https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64\n",
|
"!wget -c https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64\r\n",
|
||||||
"!chmod +x cloudflared-linux-amd64\n",
|
"!chmod +x cloudflared-linux-amd64\r\n",
|
||||||
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n",
|
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n",
|
||||||
"!sleep 10\n",
|
"!sleep 10\r\n",
|
||||||
"!cat nohup.out\n",
|
"!cat nohup.out\r\n",
|
||||||
"!python koboldcpp.py model.ggml --stream --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\n"
|
"!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\r\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"metadata": {
|
||||||
|
"accelerator": "GPU",
|
||||||
|
"colab": {
|
||||||
|
"authorship_tag": "",
|
||||||
|
"gpuType": "T4",
|
||||||
|
"include_colab_link": true,
|
||||||
|
"private_outputs": true,
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
}
|
}
|
1
expose.h
1
expose.h
|
@ -38,7 +38,6 @@ struct load_model_inputs
|
||||||
const bool use_mmap;
|
const bool use_mmap;
|
||||||
const bool use_mlock;
|
const bool use_mlock;
|
||||||
const bool use_smartcontext;
|
const bool use_smartcontext;
|
||||||
const bool unban_tokens;
|
|
||||||
const int clblast_info = 0;
|
const int clblast_info = 0;
|
||||||
const int cublas_info = 0;
|
const int cublas_info = 0;
|
||||||
const int blasbatchsize = 512;
|
const int blasbatchsize = 512;
|
||||||
|
|
|
@ -78,7 +78,6 @@ static int n_threads = 4;
|
||||||
static int n_blasthreads = 4;
|
static int n_blasthreads = 4;
|
||||||
static int n_batch = 8;
|
static int n_batch = 8;
|
||||||
static bool useSmartContext = false;
|
static bool useSmartContext = false;
|
||||||
static bool unbanTokens = false;
|
|
||||||
static int blasbatchsize = 512;
|
static int blasbatchsize = 512;
|
||||||
static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
|
static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
|
||||||
static std::string modelname;
|
static std::string modelname;
|
||||||
|
@ -556,7 +555,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
modelname = params.model = inputs.model_filename;
|
modelname = params.model = inputs.model_filename;
|
||||||
useSmartContext = inputs.use_smartcontext;
|
useSmartContext = inputs.use_smartcontext;
|
||||||
debugmode = inputs.debugmode;
|
debugmode = inputs.debugmode;
|
||||||
unbanTokens = inputs.unban_tokens;
|
|
||||||
blasbatchsize = inputs.blasbatchsize;
|
blasbatchsize = inputs.blasbatchsize;
|
||||||
if(blasbatchsize<=0)
|
if(blasbatchsize<=0)
|
||||||
{
|
{
|
||||||
|
@ -1656,7 +1654,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
lowestLogit = LowestLogit(logits);
|
lowestLogit = LowestLogit(logits);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!unbanTokens && !inputs.unban_tokens_rt)
|
if (!inputs.unban_tokens_rt)
|
||||||
{
|
{
|
||||||
// set the logit of the eos token to very low to avoid sampling it
|
// set the logit of the eos token to very low to avoid sampling it
|
||||||
logitsPtr[eosID] = lowestLogit;
|
logitsPtr[eosID] = lowestLogit;
|
||||||
|
@ -1721,7 +1719,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
printf("]\n");
|
printf("]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if((unbanTokens||inputs.unban_tokens_rt) && id==eosID)
|
if(inputs.unban_tokens_rt && id==eosID)
|
||||||
{
|
{
|
||||||
stopper_unused_tokens = remaining_tokens;
|
stopper_unused_tokens = remaining_tokens;
|
||||||
printf("\n(EOS token triggered!)");
|
printf("\n(EOS token triggered!)");
|
||||||
|
|
95
koboldcpp.py
95
koboldcpp.py
|
@ -34,7 +34,6 @@ class load_model_inputs(ctypes.Structure):
|
||||||
("use_mmap", ctypes.c_bool),
|
("use_mmap", ctypes.c_bool),
|
||||||
("use_mlock", ctypes.c_bool),
|
("use_mlock", ctypes.c_bool),
|
||||||
("use_smartcontext", ctypes.c_bool),
|
("use_smartcontext", ctypes.c_bool),
|
||||||
("unban_tokens", ctypes.c_bool),
|
|
||||||
("clblast_info", ctypes.c_int),
|
("clblast_info", ctypes.c_int),
|
||||||
("cublas_info", ctypes.c_int),
|
("cublas_info", ctypes.c_int),
|
||||||
("blasbatchsize", ctypes.c_int),
|
("blasbatchsize", ctypes.c_int),
|
||||||
|
@ -224,7 +223,6 @@ def load_model(model_filename):
|
||||||
if len(args.lora) > 1:
|
if len(args.lora) > 1:
|
||||||
inputs.lora_base = args.lora[1].encode("UTF-8")
|
inputs.lora_base = args.lora[1].encode("UTF-8")
|
||||||
inputs.use_smartcontext = args.smartcontext
|
inputs.use_smartcontext = args.smartcontext
|
||||||
inputs.unban_tokens = args.unbantokens
|
|
||||||
inputs.blasbatchsize = args.blasbatchsize
|
inputs.blasbatchsize = args.blasbatchsize
|
||||||
inputs.forceversion = args.forceversion
|
inputs.forceversion = args.forceversion
|
||||||
inputs.gpulayers = args.gpulayers
|
inputs.gpulayers = args.gpulayers
|
||||||
|
@ -307,11 +305,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_
|
||||||
inputs.grammar = grammar.encode("UTF-8")
|
inputs.grammar = grammar.encode("UTF-8")
|
||||||
inputs.grammar_retain_state = grammar_retain_state
|
inputs.grammar_retain_state = grammar_retain_state
|
||||||
inputs.unban_tokens_rt = not use_default_badwordsids
|
inputs.unban_tokens_rt = not use_default_badwordsids
|
||||||
if args.usemirostat and args.usemirostat[0]>0:
|
if mirostat in (1, 2):
|
||||||
inputs.mirostat = int(args.usemirostat[0])
|
|
||||||
inputs.mirostat_tau = float(args.usemirostat[1])
|
|
||||||
inputs.mirostat_eta = float(args.usemirostat[2])
|
|
||||||
elif mirostat in (1, 2):
|
|
||||||
inputs.mirostat = mirostat
|
inputs.mirostat = mirostat
|
||||||
inputs.mirostat_tau = mirostat_tau
|
inputs.mirostat_tau = mirostat_tau
|
||||||
inputs.mirostat_eta = mirostat_eta
|
inputs.mirostat_eta = mirostat_eta
|
||||||
|
@ -367,7 +361,7 @@ maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.45.2"
|
KcppVersion = "1.46"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
@ -529,17 +523,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
force_json = False
|
force_json = False
|
||||||
|
|
||||||
if self.path in ["", "/?"] or self.path.startswith(('/?','?')): #it's possible for the root url to have ?params without /
|
if self.path in ["", "/?"] or self.path.startswith(('/?','?')): #it's possible for the root url to have ?params without /
|
||||||
if args.stream and not "streaming=1" in self.path:
|
|
||||||
self.path = self.path.replace("streaming=0","")
|
|
||||||
if self.path.startswith(('/?','?')):
|
|
||||||
self.path += "&streaming=1"
|
|
||||||
else:
|
|
||||||
self.path = self.path + "?streaming=1"
|
|
||||||
self.send_response(302)
|
|
||||||
self.send_header("Location", self.path)
|
|
||||||
self.end_headers()
|
|
||||||
print("Force redirect to streaming mode, as --stream is set.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if self.embedded_kailite is None:
|
if self.embedded_kailite is None:
|
||||||
response_body = (f"Embedded Kobold Lite is not found.<br>You will have to connect via the main KoboldAI client, or <a href='https://lite.koboldai.net?local=1&port={self.port}'>use this URL</a> to connect.").encode()
|
response_body = (f"Embedded Kobold Lite is not found.<br>You will have to connect via the main KoboldAI client, or <a href='https://lite.koboldai.net?local=1&port={self.port}'>use this URL</a> to connect.").encode()
|
||||||
|
@ -954,28 +937,18 @@ def show_new_gui():
|
||||||
launchbrowser = ctk.IntVar(value=1)
|
launchbrowser = ctk.IntVar(value=1)
|
||||||
highpriority = ctk.IntVar()
|
highpriority = ctk.IntVar()
|
||||||
disablemmap = ctk.IntVar()
|
disablemmap = ctk.IntVar()
|
||||||
psutil = ctk.IntVar()
|
|
||||||
usemlock = ctk.IntVar()
|
usemlock = ctk.IntVar()
|
||||||
debugmode = ctk.IntVar()
|
debugmode = ctk.IntVar()
|
||||||
keepforeground = ctk.IntVar()
|
keepforeground = ctk.IntVar()
|
||||||
|
|
||||||
lowvram_var = ctk.IntVar()
|
lowvram_var = ctk.IntVar()
|
||||||
mmq_var = ctk.IntVar(value=1)
|
mmq_var = ctk.IntVar(value=1)
|
||||||
|
|
||||||
blas_threads_var = ctk.StringVar()
|
blas_threads_var = ctk.StringVar()
|
||||||
blas_size_var = ctk.IntVar()
|
blas_size_var = ctk.IntVar()
|
||||||
version_var =ctk.StringVar(value="0")
|
version_var =ctk.StringVar(value="0")
|
||||||
|
|
||||||
stream = ctk.IntVar()
|
|
||||||
smartcontext = ctk.IntVar()
|
smartcontext = ctk.IntVar()
|
||||||
unbantokens = ctk.IntVar()
|
|
||||||
usemirostat = ctk.IntVar()
|
|
||||||
mirostat_var = ctk.StringVar(value="2")
|
|
||||||
mirostat_tau = ctk.StringVar(value="5.0")
|
|
||||||
mirostat_eta = ctk.StringVar(value="0.1")
|
|
||||||
|
|
||||||
context_var = ctk.IntVar()
|
context_var = ctk.IntVar()
|
||||||
|
|
||||||
customrope_var = ctk.IntVar()
|
customrope_var = ctk.IntVar()
|
||||||
customrope_scale = ctk.StringVar(value="1.0")
|
customrope_scale = ctk.StringVar(value="1.0")
|
||||||
customrope_base = ctk.StringVar(value="10000")
|
customrope_base = ctk.StringVar(value="10000")
|
||||||
|
@ -1066,7 +1039,7 @@ def show_new_gui():
|
||||||
makeslider(quick_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 12, set=5)
|
makeslider(quick_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 12, set=5)
|
||||||
|
|
||||||
# quick boxes
|
# quick boxes
|
||||||
quick_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Streaming Mode":stream, "Use SmartContext":smartcontext, "Unban Tokens":unbantokens, "Disable MMAP":disablemmap,}
|
quick_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Use SmartContext":smartcontext, "Disable MMAP":disablemmap,}
|
||||||
for idx, name, in enumerate(quick_boxes):
|
for idx, name, in enumerate(quick_boxes):
|
||||||
makecheckbox(quick_tab, name, quick_boxes[name], int(idx/2) +20, idx%2)
|
makecheckbox(quick_tab, name, quick_boxes[name], int(idx/2) +20, idx%2)
|
||||||
# context size
|
# context size
|
||||||
|
@ -1099,7 +1072,7 @@ def show_new_gui():
|
||||||
makelabelentry(hardware_tab, "Threads:" , threads_var, 8, 50)
|
makelabelentry(hardware_tab, "Threads:" , threads_var, 8, 50)
|
||||||
|
|
||||||
# hardware checkboxes
|
# hardware checkboxes
|
||||||
hardware_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Disable MMAP":disablemmap, "Use mlock":usemlock, "PSUtil Set Threads":psutil, "Debug Mode":debugmode, "Keep Foreground":keepforeground}
|
hardware_boxes = {"Launch Browser": launchbrowser , "High Priority" : highpriority, "Disable MMAP":disablemmap, "Use mlock":usemlock, "Debug Mode":debugmode, "Keep Foreground":keepforeground}
|
||||||
|
|
||||||
for idx, name, in enumerate(hardware_boxes):
|
for idx, name, in enumerate(hardware_boxes):
|
||||||
makecheckbox(hardware_tab, name, hardware_boxes[name], int(idx/2) +30, idx%2)
|
makecheckbox(hardware_tab, name, hardware_boxes[name], int(idx/2) +30, idx%2)
|
||||||
|
@ -1117,24 +1090,10 @@ def show_new_gui():
|
||||||
# Tokens Tab
|
# Tokens Tab
|
||||||
tokens_tab = tabcontent["Tokens"]
|
tokens_tab = tabcontent["Tokens"]
|
||||||
# tokens checkboxes
|
# tokens checkboxes
|
||||||
token_boxes = {"Streaming Mode":stream, "Use SmartContext":smartcontext, "Unban Tokens":unbantokens}
|
token_boxes = {"Use SmartContext":smartcontext}
|
||||||
for idx, name, in enumerate(token_boxes):
|
for idx, name, in enumerate(token_boxes):
|
||||||
makecheckbox(tokens_tab, name, token_boxes[name], idx + 1)
|
makecheckbox(tokens_tab, name, token_boxes[name], idx + 1)
|
||||||
|
|
||||||
mirostat_entry, mirostate_label = makelabelentry(tokens_tab, "Mirostat:", mirostat_var)
|
|
||||||
mirostat_tau_entry, mirostat_tau_label = makelabelentry(tokens_tab, "Mirostat Tau:", mirostat_tau)
|
|
||||||
mirostat_eta_entry, mirostat_eta_label = makelabelentry(tokens_tab, "Mirostat Eta:", mirostat_eta)
|
|
||||||
def togglemiro(a,b,c):
|
|
||||||
items = [mirostate_label, mirostat_entry, mirostat_tau_label, mirostat_tau_entry, mirostat_eta_label, mirostat_eta_entry]
|
|
||||||
for idx, item in enumerate(items):
|
|
||||||
if usemirostat.get() == 1:
|
|
||||||
item.grid(row=11 + int(idx/2), column=idx%2, padx=8, stick="nw")
|
|
||||||
else:
|
|
||||||
item.grid_forget()
|
|
||||||
|
|
||||||
|
|
||||||
makecheckbox(tokens_tab, "Use Mirostat", row=10, variable=usemirostat, command=togglemiro)
|
|
||||||
togglemiro(1,1,1)
|
|
||||||
|
|
||||||
# context size
|
# context size
|
||||||
makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, len(contextsize_text)-1, 20, set=2)
|
makeslider(tokens_tab, "Context Size:",contextsize_text, context_var, 0, len(contextsize_text)-1, 20, set=2)
|
||||||
|
@ -1217,10 +1176,7 @@ def show_new_gui():
|
||||||
args.launch = launchbrowser.get()==1
|
args.launch = launchbrowser.get()==1
|
||||||
args.highpriority = highpriority.get()==1
|
args.highpriority = highpriority.get()==1
|
||||||
args.nommap = disablemmap.get()==1
|
args.nommap = disablemmap.get()==1
|
||||||
args.psutil_set_threads = psutil.get()==1
|
|
||||||
args.stream = stream.get()==1
|
|
||||||
args.smartcontext = smartcontext.get()==1
|
args.smartcontext = smartcontext.get()==1
|
||||||
args.unbantokens = unbantokens.get()==1
|
|
||||||
args.foreground = keepforeground.get()==1
|
args.foreground = keepforeground.get()==1
|
||||||
|
|
||||||
gpuchoiceidx = 0
|
gpuchoiceidx = 0
|
||||||
|
@ -1251,7 +1207,6 @@ def show_new_gui():
|
||||||
args.blasbatchsize = int(blasbatchsize_values[int(blas_size_var.get())])
|
args.blasbatchsize = int(blasbatchsize_values[int(blas_size_var.get())])
|
||||||
args.forceversion = 0 if version_var.get()=="" else int(version_var.get())
|
args.forceversion = 0 if version_var.get()=="" else int(version_var.get())
|
||||||
|
|
||||||
args.usemirostat = [int(mirostat_var.get()), float(mirostat_tau.get()), float(mirostat_eta.get())] if usemirostat.get()==1 else None
|
|
||||||
args.contextsize = int(contextsize_text[context_var.get()])
|
args.contextsize = int(contextsize_text[context_var.get()])
|
||||||
|
|
||||||
if customrope_var.get()==1:
|
if customrope_var.get()==1:
|
||||||
|
@ -1277,10 +1232,7 @@ def show_new_gui():
|
||||||
launchbrowser.set(1 if "launch" in dict and dict["launch"] else 0)
|
launchbrowser.set(1 if "launch" in dict and dict["launch"] else 0)
|
||||||
highpriority.set(1 if "highpriority" in dict and dict["highpriority"] else 0)
|
highpriority.set(1 if "highpriority" in dict and dict["highpriority"] else 0)
|
||||||
disablemmap.set(1 if "nommap" in dict and dict["nommap"] else 0)
|
disablemmap.set(1 if "nommap" in dict and dict["nommap"] else 0)
|
||||||
psutil.set(1 if "psutil_set_threads" in dict and dict["psutil_set_threads"] else 0)
|
|
||||||
stream.set(1 if "stream" in dict and dict["stream"] else 0)
|
|
||||||
smartcontext.set(1 if "smartcontext" in dict and dict["smartcontext"] else 0)
|
smartcontext.set(1 if "smartcontext" in dict and dict["smartcontext"] else 0)
|
||||||
unbantokens.set(1 if "unbantokens" in dict and dict["unbantokens"] else 0)
|
|
||||||
keepforeground.set(1 if "foreground" in dict and dict["foreground"] else 0)
|
keepforeground.set(1 if "foreground" in dict and dict["foreground"] else 0)
|
||||||
if "useclblast" in dict and dict["useclblast"]:
|
if "useclblast" in dict and dict["useclblast"]:
|
||||||
if clblast_option is not None:
|
if clblast_option is not None:
|
||||||
|
@ -1331,12 +1283,6 @@ def show_new_gui():
|
||||||
if "forceversion" in dict and dict["forceversion"]:
|
if "forceversion" in dict and dict["forceversion"]:
|
||||||
version_var.set(str(dict["forceversion"]))
|
version_var.set(str(dict["forceversion"]))
|
||||||
|
|
||||||
if "usemirostat" in dict and dict["usemirostat"] and len(dict["usemirostat"])>1:
|
|
||||||
usemirostat.set(0 if str(dict["usemirostat"][0])=="0" else 1)
|
|
||||||
mirostat_var.set(str(dict["usemirostat"][0]))
|
|
||||||
mirostat_tau.set(str(dict["usemirostat"][1]))
|
|
||||||
mirostat_eta.set(str(dict["usemirostat"][2]))
|
|
||||||
|
|
||||||
if "model_param" in dict and dict["model_param"]:
|
if "model_param" in dict and dict["model_param"]:
|
||||||
model_var.set(dict["model_param"])
|
model_var.set(dict["model_param"])
|
||||||
|
|
||||||
|
@ -1496,18 +1442,14 @@ def show_old_gui():
|
||||||
frameC.grid(row=4,column=0,pady=4)
|
frameC.grid(row=4,column=0,pady=4)
|
||||||
onDropdownChange(None)
|
onDropdownChange(None)
|
||||||
|
|
||||||
stream = tk.IntVar()
|
|
||||||
smartcontext = tk.IntVar()
|
smartcontext = tk.IntVar()
|
||||||
launchbrowser = tk.IntVar(value=1)
|
launchbrowser = tk.IntVar(value=1)
|
||||||
unbantokens = tk.IntVar()
|
|
||||||
highpriority = tk.IntVar()
|
highpriority = tk.IntVar()
|
||||||
disablemmap = tk.IntVar()
|
disablemmap = tk.IntVar()
|
||||||
frameD = tk.Frame(root)
|
frameD = tk.Frame(root)
|
||||||
tk.Checkbutton(frameD, text='Streaming Mode',variable=stream, onvalue=1, offvalue=0).grid(row=0,column=0)
|
|
||||||
tk.Checkbutton(frameD, text='Use SmartContext',variable=smartcontext, onvalue=1, offvalue=0).grid(row=0,column=1)
|
tk.Checkbutton(frameD, text='Use SmartContext',variable=smartcontext, onvalue=1, offvalue=0).grid(row=0,column=1)
|
||||||
tk.Checkbutton(frameD, text='High Priority',variable=highpriority, onvalue=1, offvalue=0).grid(row=1,column=0)
|
tk.Checkbutton(frameD, text='High Priority',variable=highpriority, onvalue=1, offvalue=0).grid(row=1,column=0)
|
||||||
tk.Checkbutton(frameD, text='Disable MMAP',variable=disablemmap, onvalue=1, offvalue=0).grid(row=1,column=1)
|
tk.Checkbutton(frameD, text='Disable MMAP',variable=disablemmap, onvalue=1, offvalue=0).grid(row=1,column=1)
|
||||||
tk.Checkbutton(frameD, text='Unban Tokens',variable=unbantokens, onvalue=1, offvalue=0).grid(row=2,column=0)
|
|
||||||
tk.Checkbutton(frameD, text='Launch Browser',variable=launchbrowser, onvalue=1, offvalue=0).grid(row=2,column=1)
|
tk.Checkbutton(frameD, text='Launch Browser',variable=launchbrowser, onvalue=1, offvalue=0).grid(row=2,column=1)
|
||||||
frameD.grid(row=5,column=0,pady=4)
|
frameD.grid(row=5,column=0,pady=4)
|
||||||
|
|
||||||
|
@ -1526,11 +1468,8 @@ def show_old_gui():
|
||||||
#load all the vars
|
#load all the vars
|
||||||
args.threads = int(threads_var.get())
|
args.threads = int(threads_var.get())
|
||||||
args.gpulayers = int(gpu_layers_var.get())
|
args.gpulayers = int(gpu_layers_var.get())
|
||||||
|
|
||||||
args.stream = (stream.get()==1)
|
|
||||||
args.smartcontext = (smartcontext.get()==1)
|
args.smartcontext = (smartcontext.get()==1)
|
||||||
args.launch = (launchbrowser.get()==1)
|
args.launch = (launchbrowser.get()==1)
|
||||||
args.unbantokens = (unbantokens.get()==1)
|
|
||||||
args.highpriority = (highpriority.get()==1)
|
args.highpriority = (highpriority.get()==1)
|
||||||
args.nommap = (disablemmap.get()==1)
|
args.nommap = (disablemmap.get()==1)
|
||||||
selrunchoice = runchoice.get()
|
selrunchoice = runchoice.get()
|
||||||
|
@ -1899,11 +1838,6 @@ def main(launch_args,start_server=True):
|
||||||
else:
|
else:
|
||||||
args.lora[1] = os.path.abspath(args.lora[1])
|
args.lora[1] = os.path.abspath(args.lora[1])
|
||||||
|
|
||||||
if args.psutil_set_threads:
|
|
||||||
import psutil
|
|
||||||
args.threads = psutil.cpu_count(logical=False)
|
|
||||||
print("Overriding thread count, using " + str(args.threads) + " threads instead.")
|
|
||||||
|
|
||||||
if not args.blasthreads or args.blasthreads <= 0:
|
if not args.blasthreads or args.blasthreads <= 0:
|
||||||
args.blasthreads = args.threads
|
args.blasthreads = args.threads
|
||||||
|
|
||||||
|
@ -1955,17 +1889,6 @@ def main(launch_args,start_server=True):
|
||||||
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
|
timer_thread = threading.Timer(1, onready_subprocess) #1 second delay
|
||||||
timer_thread.start()
|
timer_thread.start()
|
||||||
|
|
||||||
# show deprecation warnings
|
|
||||||
if args.unbantokens:
|
|
||||||
print("WARNING: --unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API.")
|
|
||||||
if args.usemirostat:
|
|
||||||
print("WARNING: --usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API.")
|
|
||||||
if args.stream:
|
|
||||||
print("WARNING: --stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.")
|
|
||||||
if args.psutil_set_threads:
|
|
||||||
print("WARNING: --psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient")
|
|
||||||
|
|
||||||
|
|
||||||
if start_server:
|
if start_server:
|
||||||
print(f"Please connect to custom endpoint at {epurl}")
|
print(f"Please connect to custom endpoint at {epurl}")
|
||||||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||||
|
@ -2012,13 +1935,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||||
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
parser.add_argument("--tensor_split", help="For CUDA with ALL GPU set only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+')
|
||||||
parser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", type=str, default="",nargs=1)
|
parser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", type=str, default="",nargs=1)
|
||||||
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them. Polled-streaming is disabled while multiple requests are in queue.", action='store_true')
|
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true')
|
||||||
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
|
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
|
||||||
|
|
||||||
#deprecated
|
|
||||||
parser.add_argument("--psutil_set_threads", help="--psutil_set_threads is DEPRECATED and will be removed soon! This parameter was generally unhelpful and unnecessary, as the defaults were usually sufficient.", action='store_true')
|
|
||||||
parser.add_argument("--stream", help="--stream is DEPRECATED and will be removed soon! This was a Kobold Lite only parameter, which is now a proper setting toggle inside Lite.", action='store_true')
|
|
||||||
parser.add_argument("--unbantokens", help="--unbantokens is DEPRECATED and will be removed soon! EOS unbans should now be set via the generate API", action='store_true')
|
|
||||||
parser.add_argument("--usemirostat", help="--usemirostat is DEPRECATED and will be removed soon! Mirostat values should now be set via the generate API",metavar=('[type]', '[tau]', '[eta]'), type=float, nargs=3)
|
|
||||||
|
|
||||||
main(parser.parse_args(),start_server=True)
|
main(parser.parse_args(),start_server=True)
|
Loading…
Add table
Add a link
Reference in a new issue