colab use mmq, update lite and ver
This commit is contained in:
parent
60098a176b
commit
32cf02487e
3 changed files with 10 additions and 5 deletions
|
@ -39,7 +39,7 @@
|
||||||
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n",
|
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n",
|
"Model = \"https://huggingface.co/TheBloke/Airoboros-L2-13B-2.2-GGUF/resolve/main/airoboros-l2-13b-2.2.Q4_K_M.gguf\" #@param [\"\"]{allow-input: true}\n",
|
||||||
"Layers = 40 #@param [40]{allow-input: true}\n",
|
"Layers = 43 #@param [43]{allow-input: true}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"%cd /content\n",
|
"%cd /content\n",
|
||||||
"!git clone https://github.com/LostRuins/koboldcpp\n",
|
"!git clone https://github.com/LostRuins/koboldcpp\n",
|
||||||
|
@ -52,7 +52,7 @@
|
||||||
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n",
|
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\n",
|
||||||
"!sleep 10\n",
|
"!sleep 10\n",
|
||||||
"!cat nohup.out\n",
|
"!cat nohup.out\n",
|
||||||
"!python koboldcpp.py model.ggml --stream --usecublas 0 --gpulayers $Layers --hordeconfig concedo\n"
|
"!python koboldcpp.py model.ggml --stream --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -9183,6 +9183,7 @@ Current version: 68
|
||||||
newbodystr = newbodystr.replace(new RegExp(`${contextDict[`${role}Open`]}([^]*?)${contextDict.closeTag}`, 'g'), (match, p) => {
|
newbodystr = newbodystr.replace(new RegExp(`${contextDict[`${role}Open`]}([^]*?)${contextDict.closeTag}`, 'g'), (match, p) => {
|
||||||
let replacedText = match.replace(/<[^>]*>/g, (htmlPart) => { internalHTMLparts.push(htmlPart); return `<internal_html_${internalHTMLparts.length - 1}>`; });
|
let replacedText = match.replace(/<[^>]*>/g, (htmlPart) => { internalHTMLparts.push(htmlPart); return `<internal_html_${internalHTMLparts.length - 1}>`; });
|
||||||
replacedText = replacedText.replace(/\*(\S[^*]+\S)\*/g, wrapperSpan(styleRole, 'action')); // Apply the actions style to *actions*.
|
replacedText = replacedText.replace(/\*(\S[^*]+\S)\*/g, wrapperSpan(styleRole, 'action')); // Apply the actions style to *actions*.
|
||||||
|
replacedText = replacedText.replace(/“(.*?)”/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
|
||||||
replacedText = replacedText.replace(/"(.*?)"/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
|
replacedText = replacedText.replace(/"(.*?)"/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
|
||||||
return replacedText;
|
return replacedText;
|
||||||
});
|
});
|
||||||
|
@ -9199,8 +9200,9 @@ Current version: 68
|
||||||
}
|
}
|
||||||
function wrapperSpan(role, type) {
|
function wrapperSpan(role, type) {
|
||||||
let fontStyle = type=='action'?'italic':'normal';
|
let fontStyle = type=='action'?'italic':'normal';
|
||||||
let injectQuotes = type=='speech'?'"':'';
|
let injectQuotes1 = type=='speech'?'“':'';
|
||||||
let textCol = as[`${type}_tcolor_${role}`]; return `<span style='color: ${textCol}; font-style: ${fontStyle}; font-weight: normal'>${injectQuotes}$1${injectQuotes}</span>`;
|
let injectQuotes2 = type=='speech'?'”':'';
|
||||||
|
let textCol = as[`${type}_tcolor_${role}`]; return `<span style='color: ${textCol}; font-style: ${fontStyle}; font-weight: normal'>${injectQuotes1}$1${injectQuotes2}</span>`;
|
||||||
}
|
}
|
||||||
function image(role) {
|
function image(role) {
|
||||||
if (!as[`${role}_portrait`] || as.border_style == 'None' || role == 'sys') { return ''; }
|
if (!as[`${role}_portrait`] || as.border_style == 'None' || role == 'sys') { return ''; }
|
||||||
|
|
|
@ -354,7 +354,7 @@ maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.44.2"
|
KcppVersion = "1.45"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
@ -556,6 +556,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
lastc = handle.get_last_token_count()
|
lastc = handle.get_last_token_count()
|
||||||
stopreason = handle.get_last_stop_reason()
|
stopreason = handle.get_last_stop_reason()
|
||||||
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1)}).encode())
|
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "stop_reason":stopreason, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1)}).encode())
|
||||||
|
elif self.path.endswith(('/api')) or self.path.endswith(('/api/v1')):
|
||||||
|
response_body = (json.dumps({"result":"KoboldCpp partial API reference can be found at https://link.concedo.workers.dev/koboldapi"}).encode())
|
||||||
|
|
||||||
|
|
||||||
if response_body is None:
|
if response_body is None:
|
||||||
self.send_response(404)
|
self.send_response(404)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue