use filename as default model name
This commit is contained in:
parent
a0c1ba7747
commit
da8a09ba10
2 changed files with 22 additions and 9 deletions
|
@ -77,6 +77,7 @@ For more information, be sure to run the program with the `--help` flag.
|
||||||
## Notes
|
## Notes
|
||||||
- Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so.
|
- Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so.
|
||||||
- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat`
|
- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat`
|
||||||
|
- API documentation available at `/api` and https://lite.koboldai.net/koboldcpp_api
|
||||||
- Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable):
|
- Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable):
|
||||||
- LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more)
|
- LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more)
|
||||||
- GPT-2 / Cerebras
|
- GPT-2 / Cerebras
|
||||||
|
|
30
koboldcpp.py
30
koboldcpp.py
|
@ -662,6 +662,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
if self.path.endswith('/api/extra/abort'):
|
if self.path.endswith('/api/extra/abort'):
|
||||||
if requestsinqueue==0:
|
if requestsinqueue==0:
|
||||||
ag = handle.abort_generate()
|
ag = handle.abort_generate()
|
||||||
|
time.sleep(0.3) #short delay before replying
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(json.dumps({"success": ("true" if ag else "false")}).encode())
|
self.wfile.write(json.dumps({"success": ("true" if ag else "false")}).encode())
|
||||||
|
@ -702,7 +703,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
}}).encode())
|
}}).encode())
|
||||||
return
|
return
|
||||||
if reqblocking:
|
if reqblocking:
|
||||||
requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0
|
requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sse_stream_flag = False
|
sse_stream_flag = False
|
||||||
|
@ -727,7 +728,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
api_format = 4
|
api_format = 4
|
||||||
force_json = True
|
force_json = True
|
||||||
|
|
||||||
if api_format>0:
|
if api_format > 0:
|
||||||
genparams = None
|
genparams = None
|
||||||
try:
|
try:
|
||||||
genparams = json.loads(body)
|
genparams = json.loads(body)
|
||||||
|
@ -755,7 +756,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
self.wfile.write(json.dumps(gen).encode())
|
self.wfile.write(json.dumps(gen).encode())
|
||||||
except:
|
except:
|
||||||
print("Generate: The response could not be sent, maybe connection was terminated?")
|
print("Generate: The response could not be sent, maybe connection was terminated?")
|
||||||
|
|
||||||
return
|
return
|
||||||
finally:
|
finally:
|
||||||
modelbusy.release()
|
modelbusy.release()
|
||||||
|
@ -950,10 +950,10 @@ def show_new_gui():
|
||||||
return entry, label
|
return entry, label
|
||||||
|
|
||||||
|
|
||||||
def makefileentry(parent, text, searchtext, var, row=0, width=250):
|
def makefileentry(parent, text, searchtext, var, row=0, width=250, filetypes=[]):
|
||||||
makelabel(parent, text, row)
|
makelabel(parent, text, row)
|
||||||
def getfilename(var, text):
|
def getfilename(var, text):
|
||||||
var.set(askopenfilename(title=text))
|
var.set(askopenfilename(title=text,filetypes=filetypes))
|
||||||
entry = ctk.CTkEntry(parent, width, textvariable=var)
|
entry = ctk.CTkEntry(parent, width, textvariable=var)
|
||||||
entry.grid(row=row+1, column=0, padx=8, stick="nw")
|
entry.grid(row=row+1, column=0, padx=8, stick="nw")
|
||||||
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
|
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
|
||||||
|
@ -1106,7 +1106,7 @@ def show_new_gui():
|
||||||
makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2)
|
makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2)
|
||||||
|
|
||||||
# load model
|
# load model
|
||||||
makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170)
|
makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170,filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")])
|
||||||
|
|
||||||
# Hardware Tab
|
# Hardware Tab
|
||||||
hardware_tab = tabcontent["Hardware"]
|
hardware_tab = tabcontent["Hardware"]
|
||||||
|
@ -1173,7 +1173,7 @@ def show_new_gui():
|
||||||
# Model Tab
|
# Model Tab
|
||||||
model_tab = tabcontent["Model"]
|
model_tab = tabcontent["Model"]
|
||||||
|
|
||||||
makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1)
|
makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1, filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")])
|
||||||
makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3)
|
makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3)
|
||||||
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5)
|
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5)
|
||||||
|
|
||||||
|
@ -1646,8 +1646,14 @@ def loadconfigfile(filename):
|
||||||
for key, value in config.items():
|
for key, value in config.items():
|
||||||
setattr(args, key, value)
|
setattr(args, key, value)
|
||||||
|
|
||||||
|
def sanitize_string(input_string):
|
||||||
|
# alphanumeric characters, dots, dashes, and underscores
|
||||||
|
import re
|
||||||
|
sanitized_string = re.sub( r'[^\w\d\.\-_]', '', input_string)
|
||||||
|
return sanitized_string
|
||||||
|
|
||||||
def main(launch_args,start_server=True):
|
def main(launch_args,start_server=True):
|
||||||
global args
|
global args, friendlymodelname
|
||||||
args = launch_args
|
args = launch_args
|
||||||
embedded_kailite = None
|
embedded_kailite = None
|
||||||
embedded_kcpp_docs = None
|
embedded_kcpp_docs = None
|
||||||
|
@ -1678,8 +1684,14 @@ def main(launch_args,start_server=True):
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
|
# sanitize and replace the default vanity name. remember me....
|
||||||
|
if args.model_param!="":
|
||||||
|
newmdldisplayname = os.path.basename(args.model_param)
|
||||||
|
newmdldisplayname = os.path.splitext(newmdldisplayname)[0]
|
||||||
|
friendlymodelname = "koboldcpp/" + sanitize_string(newmdldisplayname)
|
||||||
|
|
||||||
if args.hordeconfig and args.hordeconfig[0]!="":
|
if args.hordeconfig and args.hordeconfig[0]!="":
|
||||||
global friendlymodelname, maxhordelen, maxhordectx, showdebug
|
global maxhordelen, maxhordectx, showdebug
|
||||||
friendlymodelname = args.hordeconfig[0]
|
friendlymodelname = args.hordeconfig[0]
|
||||||
if not friendlymodelname.startswith("koboldcpp/"):
|
if not friendlymodelname.startswith("koboldcpp/"):
|
||||||
friendlymodelname = "koboldcpp/" + friendlymodelname
|
friendlymodelname = "koboldcpp/" + friendlymodelname
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue