use filename as default model name

This commit is contained in:
Concedo 2023-10-05 22:24:20 +08:00
parent a0c1ba7747
commit da8a09ba10
2 changed files with 22 additions and 9 deletions

View file

@ -77,6 +77,7 @@ For more information, be sure to run the program with the `--help` flag.
## Notes ## Notes
- Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so. - Generation delay scales linearly with original prompt length. If OpenBLAS is enabled then prompt ingestion becomes about 2-3x faster. This is automatic on windows, but will require linking on OSX and Linux. CLBlast speeds this up even further, and `--gpulayers` + `--useclblast` or `--usecublas` more so.
- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat` - I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat`
- API documentation available at `/api` and https://lite.koboldai.net/koboldcpp_api
- Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable): - Supported GGML models (Includes backward compatibility for older versions/legacy GGML models, though some newer features might be unavailable):
- LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more) - LLAMA and LLAMA2 (LLaMA / Alpaca / GPT4All / Vicuna / Koala / Pygmalion 7B / Metharme 7B / WizardLM and many more)
- GPT-2 / Cerebras - GPT-2 / Cerebras

View file

@ -662,6 +662,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
if self.path.endswith('/api/extra/abort'): if self.path.endswith('/api/extra/abort'):
if requestsinqueue==0: if requestsinqueue==0:
ag = handle.abort_generate() ag = handle.abort_generate()
time.sleep(0.3) #short delay before replying
self.send_response(200) self.send_response(200)
self.end_headers() self.end_headers()
self.wfile.write(json.dumps({"success": ("true" if ag else "false")}).encode()) self.wfile.write(json.dumps({"success": ("true" if ag else "false")}).encode())
@ -702,7 +703,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
}}).encode()) }}).encode())
return return
if reqblocking: if reqblocking:
requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0 requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0
try: try:
sse_stream_flag = False sse_stream_flag = False
@ -727,7 +728,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
api_format = 4 api_format = 4
force_json = True force_json = True
if api_format>0: if api_format > 0:
genparams = None genparams = None
try: try:
genparams = json.loads(body) genparams = json.loads(body)
@ -755,7 +756,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
self.wfile.write(json.dumps(gen).encode()) self.wfile.write(json.dumps(gen).encode())
except: except:
print("Generate: The response could not be sent, maybe connection was terminated?") print("Generate: The response could not be sent, maybe connection was terminated?")
return return
finally: finally:
modelbusy.release() modelbusy.release()
@ -950,10 +950,10 @@ def show_new_gui():
return entry, label return entry, label
def makefileentry(parent, text, searchtext, var, row=0, width=250): def makefileentry(parent, text, searchtext, var, row=0, width=250, filetypes=[]):
makelabel(parent, text, row) makelabel(parent, text, row)
def getfilename(var, text): def getfilename(var, text):
var.set(askopenfilename(title=text)) var.set(askopenfilename(title=text,filetypes=filetypes))
entry = ctk.CTkEntry(parent, width, textvariable=var) entry = ctk.CTkEntry(parent, width, textvariable=var)
entry.grid(row=row+1, column=0, padx=8, stick="nw") entry.grid(row=row+1, column=0, padx=8, stick="nw")
button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b)) button = ctk.CTkButton(parent, 50, text="Browse", command= lambda a=var,b=searchtext:getfilename(a,b))
@ -1106,7 +1106,7 @@ def show_new_gui():
makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2) makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, set=2)
# load model # load model
makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170) makefileentry(quick_tab, "Model:", "Select GGML Model File", model_var, 40, 170,filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")])
# Hardware Tab # Hardware Tab
hardware_tab = tabcontent["Hardware"] hardware_tab = tabcontent["Hardware"]
@ -1173,7 +1173,7 @@ def show_new_gui():
# Model Tab # Model Tab
model_tab = tabcontent["Model"] model_tab = tabcontent["Model"]
makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1) makefileentry(model_tab, "Model:", "Select GGML Model File", model_var, 1, filetypes=[("GGML Model Files", "*.gguf;*.bin;*.ggml")])
makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3) makefileentry(model_tab, "Lora:", "Select Lora File",lora_var, 3)
makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5) makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5)
@ -1646,8 +1646,14 @@ def loadconfigfile(filename):
for key, value in config.items(): for key, value in config.items():
setattr(args, key, value) setattr(args, key, value)
def sanitize_string(input_string):
# alphanumeric characters, dots, dashes, and underscores
import re
sanitized_string = re.sub( r'[^\w\d\.\-_]', '', input_string)
return sanitized_string
def main(launch_args,start_server=True): def main(launch_args,start_server=True):
global args global args, friendlymodelname
args = launch_args args = launch_args
embedded_kailite = None embedded_kailite = None
embedded_kcpp_docs = None embedded_kcpp_docs = None
@ -1678,8 +1684,14 @@ def main(launch_args,start_server=True):
time.sleep(3) time.sleep(3)
sys.exit(2) sys.exit(2)
# sanitize and replace the default vanity name. remember me....
if args.model_param!="":
newmdldisplayname = os.path.basename(args.model_param)
newmdldisplayname = os.path.splitext(newmdldisplayname)[0]
friendlymodelname = "koboldcpp/" + sanitize_string(newmdldisplayname)
if args.hordeconfig and args.hordeconfig[0]!="": if args.hordeconfig and args.hordeconfig[0]!="":
global friendlymodelname, maxhordelen, maxhordectx, showdebug global maxhordelen, maxhordectx, showdebug
friendlymodelname = args.hordeconfig[0] friendlymodelname = args.hordeconfig[0]
if not friendlymodelname.startswith("koboldcpp/"): if not friendlymodelname.startswith("koboldcpp/"):
friendlymodelname = "koboldcpp/" + friendlymodelname friendlymodelname = "koboldcpp/" + friendlymodelname