From 32cf02487e19649a2413ec696fb59c3a1b28a3c0 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 23 Sep 2023 23:32:00 +0800 Subject: [PATCH] colab use mmq, update lite and ver --- colab.ipynb | 4 ++-- klite.embd | 6 ++++-- koboldcpp.py | 5 ++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/colab.ipynb b/colab.ipynb index 81f6b0dd1..da9bef59a 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -39,7 +39,7 @@ "#@title v-- Enter your model below and then click this to start Koboldcpp\n", "\n", "Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n", - "Layers = 40 #@param [40]{allow-input: true}\n", + "Layers = 43 #@param [43]{allow-input: true}\n", "\n", "%cd /content\n", "!git clone https://github.com/LostRuins/koboldcpp\n", @@ -52,7 +52,7 @@ "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n", "!sleep 10\n", "!cat nohup.out\n", - "!python koboldcpp.py model.ggml --stream --usecublas 0 --gpulayers $Layers --hordeconfig concedo\n" + "!python koboldcpp.py model.ggml --stream --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\n" ] } ] diff --git a/klite.embd b/klite.embd index 9a4a31f6a..1294ddf28 100644 --- a/klite.embd +++ b/klite.embd @@ -9183,6 +9183,7 @@ Current version: 68 newbodystr = newbodystr.replace(new RegExp(`${contextDict[`${role}Open`]}([^]*?)${contextDict.closeTag}`, 'g'), (match, p) => { let replacedText = match.replace(/<[^>]*>/g, (htmlPart) => { internalHTMLparts.push(htmlPart); return ``; }); replacedText = replacedText.replace(/\*(\S[^*]+\S)\*/g, wrapperSpan(styleRole, 'action')); // Apply the actions style to *actions*. + replacedText = replacedText.replace(/“(.*?)”/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech". replacedText = replacedText.replace(/"(.*?)"/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech". return replacedText; }); @@ -9199,8 +9200,9 @@ Current version: 68 } function wrapperSpan(role, type) { let fontStyle = type=='action'?'italic':'normal'; - let injectQuotes = type=='speech'?'"':''; - let textCol = as[`${type}_tcolor_${role}`]; return `${injectQuotes}$1${injectQuotes}`; + let injectQuotes1 = type=='speech'?'“':''; + let injectQuotes2 = type=='speech'?'”':''; + let textCol = as[`${type}_tcolor_${role}`]; return `${injectQuotes1}$1${injectQuotes2}`; } function image(role) { if (!as[`${role}_portrait`] || as.border_style == 'None' || role == 'sys') { return ''; } diff --git a/koboldcpp.py b/koboldcpp.py index 67fa95441..4fb6d7410 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -354,7 +354,7 @@ maxhordelen = 256 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.44.2" +KcppVersion = "1.45" showdebug = True showsamplerwarning = True showmaxctxwarning = True @@ -556,6 +556,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): lastc = handle.get_last_token_count() stopreason = handle.get_last_stop_reason() response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1)}).encode()) + elif self.path.endswith(('/api')) or self.path.endswith(('/api/v1')): + response_body = (json.dumps({"result":"KoboldCpp partial API reference can be found at https://link.concedo.workers.dev/koboldapi"}).encode()) + if response_body is None: self.send_response(404)