add /completion.js file to make it easy to use the server from js

This commit is contained in:
Tobias Lütke 2023-07-02 15:56:10 -04:00
parent 8e1b04d319
commit dd1df3f31c
No known key found for this signature in database
GPG key ID: 1FC0DBB14164709A
6 changed files with 793 additions and 694 deletions

View file

@ -63,12 +63,13 @@
}
</style>
<script type="module">
import {
html, h, signal, effect, computed, render, useSignal, useEffect, useRef
} from '/index.js';
import { llamaComplete } from '/completion.js';
const transcript = signal([])
const chatStarted = computed(() => transcript.value.length > 0)
@ -93,79 +94,13 @@
return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(params[key]));
}
const llamaCompletionStream = async (params) => {
controller.value = new AbortController();
const sig = controller.value.signal;
const data = JSON.stringify({
stream: true,
prompt: params.prompt,
n_predict: parseInt(nPredict.value),
temperature: parseFloat(temperature.value),
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")]
});
// we use fetch directly here becasue the built in fetchEventSource does not support POST
const response = await fetch("/completion", {
method: 'POST',
body: data,
headers: {
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Accept': 'text/event-stream'
},
signal: sig,
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
try {
while (true) {
const result = await reader.read();
if (result.done) {
break;
}
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
// mainly care about the data: key here, which we expect as json
const text = decoder.decode(result.value);
// parse all sse events and add them to result
const regex = /^(\S+):\s(.*)$/gm;
for (const match of text.matchAll(regex)) {
result[match[1]] = match[2]
}
// since we know this is llama.cpp, let's just decode the json in data
result.data = JSON.parse(result.data);
// callack
params.onmessage(result);
// if we got a stop token from server, we will break here
if (result.data.stop) {
break;
}
}
} catch (e) {
console.error(e);
throw e;
}
finally {
controller.value.abort();
controller.value = null;
}
}
// send message to server
const chat = async (msg) => {
if (controller.value) {
console.log('already running...');
return;
}
controller.value = new AbortController();
transcript.value = [...transcript.value, ['{{user}}', msg]];
const payload = template(chatTemplate.value, {
@ -176,19 +111,25 @@
let currentMessage = '';
let history = transcript.value;
llamaCompletionStream({
const params = {
prompt: payload,
onmessage: (message) => {
const data = message.data;
currentMessage += data.content;
n_predict: parseInt(nPredict.value),
temperature: parseFloat(temperature.value),
stop: ["</s>", template("{{bot}}:"), template("{{user}}:")],
}
transcript.value = [...history,["{{bot}}", currentMessage]];
await llamaComplete(params, controller.value, (message) => {
const data = message.data;
currentMessage += data.content;
if (data.stop) {
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
}
transcript.value = [...history,["{{bot}}", currentMessage]];
if (data.stop) {
console.log("-->", data, ' response was:', currentMessage, 'transcript state:', transcript.value);
}
});
})
controller.value = null;
}
function MessageInput() {