colab use mmq, update lite and ver

This commit is contained in:
Concedo 2023-09-23 23:32:00 +08:00
parent 60098a176b
commit 32cf02487e
3 changed files with 10 additions and 5 deletions

View file

@ -39,7 +39,7 @@
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n", "#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n",
"\n", "\n",
"Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n", "Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n",
"Layers = 40 #@param [40]{allow-input: true}\n", "Layers = 43 #@param [43]{allow-input: true}\n",
"\n", "\n",
"%cd /content\n", "%cd /content\n",
"!git clone https://github.com/LostRuins/koboldcpp\n", "!git clone https://github.com/LostRuins/koboldcpp\n",
@ -52,7 +52,7 @@
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n", "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n",
"!sleep 10\n", "!sleep 10\n",
"!cat nohup.out\n", "!cat nohup.out\n",
"!python koboldcpp.py model.ggml --stream --usecublas 0 --gpulayers $Layers --hordeconfig concedo\n" "!python koboldcpp.py model.ggml --stream --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\n"
] ]
} }
] ]

View file

@ -9183,6 +9183,7 @@ Current version: 68
newbodystr = newbodystr.replace(new RegExp(`${contextDict[`${role}Open`]}([^]*?)${contextDict.closeTag}`, 'g'), (match, p) => { newbodystr = newbodystr.replace(new RegExp(`${contextDict[`${role}Open`]}([^]*?)${contextDict.closeTag}`, 'g'), (match, p) => {
let replacedText = match.replace(/<[^>]*>/g, (htmlPart) => { internalHTMLparts.push(htmlPart); return `<internal_html_${internalHTMLparts.length - 1}>`; }); let replacedText = match.replace(/<[^>]*>/g, (htmlPart) => { internalHTMLparts.push(htmlPart); return `<internal_html_${internalHTMLparts.length - 1}>`; });
replacedText = replacedText.replace(/\*(\S[^*]+\S)\*/g, wrapperSpan(styleRole, 'action')); // Apply the actions style to *actions*. replacedText = replacedText.replace(/\*(\S[^*]+\S)\*/g, wrapperSpan(styleRole, 'action')); // Apply the actions style to *actions*.
replacedText = replacedText.replace(/“(.*?)”/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech". replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
return replacedText; return replacedText;
}); });
@ -9199,8 +9200,9 @@ Current version: 68
} }
function wrapperSpan(role, type) { function wrapperSpan(role, type) {
let fontStyle = type=='action'?'italic':'normal'; let fontStyle = type=='action'?'italic':'normal';
let injectQuotes = type=='speech'?'&quot;':''; let injectQuotes1 = type=='speech'?'“':'';
let textCol = as[`${type}_tcolor_${role}`]; return `<span style='color: ${textCol}; font-style: ${fontStyle}; font-weight: normal'>${injectQuotes}$1${injectQuotes}</span>`; let injectQuotes2 = type=='speech'?'”':'';
let textCol = as[`${type}_tcolor_${role}`]; return `<span style='color: ${textCol}; font-style: ${fontStyle}; font-weight: normal'>${injectQuotes1}$1${injectQuotes2}</span>`;
} }
function image(role) { function image(role) {
if (!as[`${role}_portrait`] || as.border_style == 'None' || role == 'sys') { return ''; } if (!as[`${role}_portrait`] || as.border_style == 'None' || role == 'sys') { return ''; }

View file

@ -354,7 +354,7 @@ maxhordelen = 256
modelbusy = threading.Lock() modelbusy = threading.Lock()
requestsinqueue = 0 requestsinqueue = 0
defaultport = 5001 defaultport = 5001
KcppVersion = "1.44.2" KcppVersion = "1.45"
showdebug = True showdebug = True
showsamplerwarning = True showsamplerwarning = True
showmaxctxwarning = True showmaxctxwarning = True
@ -556,6 +556,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
lastc = handle.get_last_token_count() lastc = handle.get_last_token_count()
stopreason = handle.get_last_stop_reason() stopreason = handle.get_last_stop_reason()
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1)}).encode()) response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1)}).encode())
elif self.path.endswith(('/api')) or self.path.endswith(('/api/v1')):
response_body = (json.dumps({"result":"KoboldCpp partial API reference can be found at https://link.concedo.workers.dev/koboldapi"}).encode())
if response_body is None: if response_body is None:
self.send_response(404) self.send_response(404)