get rid of completion.js
This commit is contained in:
parent
1a31d0dc00
commit
bd2f59e50a
5 changed files with 39 additions and 236 deletions
|
@ -15,7 +15,7 @@
|
||||||
<!-- sidebar -->
|
<!-- sidebar -->
|
||||||
<div class="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
|
<div class="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
|
||||||
<label for="toggle-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
|
<label for="toggle-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
|
||||||
<div class="flex flex-col bg-base-200 min-h-full max-w-[calc(100vw-2em)] py-4 px-4">
|
<div class="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
|
||||||
<div class="flex flex-row items-center justify-between mb-4 mt-4">
|
<div class="flex flex-row items-center justify-between mb-4 mt-4">
|
||||||
<h2 class="font-bold ml-4">Conversations</h2>
|
<h2 class="font-bold ml-4">Conversations</h2>
|
||||||
|
|
||||||
|
|
7
examples/server/webui/package-lock.json
generated
7
examples/server/webui/package-lock.json
generated
|
@ -13,6 +13,7 @@
|
||||||
"markdown-it": "^14.1.0",
|
"markdown-it": "^14.1.0",
|
||||||
"postcss": "^8.4.49",
|
"postcss": "^8.4.49",
|
||||||
"tailwindcss": "^3.4.15",
|
"tailwindcss": "^3.4.15",
|
||||||
|
"textlinestream": "^1.1.1",
|
||||||
"vite-plugin-singlefile": "^2.0.3",
|
"vite-plugin-singlefile": "^2.0.3",
|
||||||
"vue": "^3.5.13"
|
"vue": "^3.5.13"
|
||||||
},
|
},
|
||||||
|
@ -2677,6 +2678,12 @@
|
||||||
"node": ">=14.0.0"
|
"node": ">=14.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/textlinestream": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/textlinestream/-/textlinestream-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/uc.micro": {
|
"node_modules/uc.micro": {
|
||||||
"version": "2.1.0",
|
"version": "2.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
"markdown-it": "^14.1.0",
|
"markdown-it": "^14.1.0",
|
||||||
"postcss": "^8.4.49",
|
"postcss": "^8.4.49",
|
||||||
"tailwindcss": "^3.4.15",
|
"tailwindcss": "^3.4.15",
|
||||||
|
"textlinestream": "^1.1.1",
|
||||||
"vite-plugin-singlefile": "^2.0.3",
|
"vite-plugin-singlefile": "^2.0.3",
|
||||||
"vue": "^3.5.13"
|
"vue": "^3.5.13"
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,225 +0,0 @@
|
||||||
const paramDefaults = {
|
|
||||||
stream: true,
|
|
||||||
temperature: 0.2,
|
|
||||||
};
|
|
||||||
|
|
||||||
let generation_settings = null;
|
|
||||||
|
|
||||||
export class CompletionError extends Error {
|
|
||||||
constructor(message, name, data) {
|
|
||||||
super(message);
|
|
||||||
this.name = name;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Completes the prompt as a generator. Recommended for most use cases.
|
|
||||||
//
|
|
||||||
// Example:
|
|
||||||
//
|
|
||||||
// import { llama } from '/completion.js'
|
|
||||||
//
|
|
||||||
// const request = llama("Tell me a joke", {n_predict: 800})
|
|
||||||
// for await (const chunk of request) {
|
|
||||||
// document.write(chunk.data.content)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
export async function* llama(prompt, params = {}, config = {}) {
|
|
||||||
let controller = config.controller;
|
|
||||||
const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
|
||||||
|
|
||||||
if (!controller) {
|
|
||||||
controller = new AbortController();
|
|
||||||
}
|
|
||||||
|
|
||||||
const completionParams = { ...paramDefaults, ...params, prompt };
|
|
||||||
|
|
||||||
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
|
|
||||||
method: 'POST',
|
|
||||||
body: JSON.stringify(completionParams),
|
|
||||||
headers: {
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Accept': 'text/event-stream',
|
|
||||||
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
|
|
||||||
},
|
|
||||||
signal: controller.signal,
|
|
||||||
});
|
|
||||||
|
|
||||||
const status = response.status;
|
|
||||||
if (status !== 200) {
|
|
||||||
try {
|
|
||||||
const body = await response.json();
|
|
||||||
if (body && body.error && body.error.message) {
|
|
||||||
throw new CompletionError(body.error.message, 'ServerError');
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
throw new CompletionError(err.message, 'ServerError');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const reader = response.body.getReader();
|
|
||||||
const decoder = new TextDecoder();
|
|
||||||
|
|
||||||
let content = "";
|
|
||||||
let leftover = ""; // Buffer for partially read lines
|
|
||||||
|
|
||||||
try {
|
|
||||||
let cont = true;
|
|
||||||
|
|
||||||
while (cont) {
|
|
||||||
const result = await reader.read();
|
|
||||||
if (result.done) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add any leftover data to the current chunk of data
|
|
||||||
const text = leftover + decoder.decode(result.value);
|
|
||||||
|
|
||||||
// Check if the last character is a line break
|
|
||||||
const endsWithLineBreak = text.endsWith('\n');
|
|
||||||
|
|
||||||
// Split the text into lines
|
|
||||||
let lines = text.split('\n');
|
|
||||||
|
|
||||||
// If the text doesn't end with a line break, then the last line is incomplete
|
|
||||||
// Store it in leftover to be added to the next chunk of data
|
|
||||||
if (!endsWithLineBreak) {
|
|
||||||
leftover = lines.pop();
|
|
||||||
} else {
|
|
||||||
leftover = ""; // Reset leftover if we have a line break at the end
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse all sse events and add them to result
|
|
||||||
const regex = /^(\S+):\s(.*)$/gm;
|
|
||||||
for (const line of lines) {
|
|
||||||
const match = regex.exec(line);
|
|
||||||
if (match) {
|
|
||||||
result[match[1]] = match[2];
|
|
||||||
if (result.data === '[DONE]') {
|
|
||||||
cont = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// since we know this is llama.cpp, let's just decode the json in data
|
|
||||||
if (result.data) {
|
|
||||||
result.data = JSON.parse(result.data);
|
|
||||||
content += result.data.content;
|
|
||||||
|
|
||||||
// yield
|
|
||||||
yield result;
|
|
||||||
|
|
||||||
// if we got a stop token from server, we will break here
|
|
||||||
if (result.data.stop) {
|
|
||||||
if (result.data.generation_settings) {
|
|
||||||
generation_settings = result.data.generation_settings;
|
|
||||||
}
|
|
||||||
cont = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (result.error) {
|
|
||||||
try {
|
|
||||||
result.error = JSON.parse(result.error);
|
|
||||||
if (result.error.message.includes('slot unavailable')) {
|
|
||||||
// Throw an error to be caught by upstream callers
|
|
||||||
throw new Error('slot unavailable');
|
|
||||||
} else {
|
|
||||||
console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
|
|
||||||
}
|
|
||||||
} catch(e) {
|
|
||||||
console.error(`llama.cpp error ${result.error}`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
if (e.name !== 'AbortError') {
|
|
||||||
console.error("llama error: ", e);
|
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
controller.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
return content;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call llama, return an event target that you can subscribe to
|
|
||||||
//
|
|
||||||
// Example:
|
|
||||||
//
|
|
||||||
// import { llamaEventTarget } from '/completion.js'
|
|
||||||
//
|
|
||||||
// const conn = llamaEventTarget(prompt)
|
|
||||||
// conn.addEventListener("message", (chunk) => {
|
|
||||||
// document.write(chunk.detail.content)
|
|
||||||
// })
|
|
||||||
//
|
|
||||||
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
|
|
||||||
const eventTarget = new EventTarget();
|
|
||||||
(async () => {
|
|
||||||
let content = "";
|
|
||||||
for await (const chunk of llama(prompt, params, config)) {
|
|
||||||
if (chunk.data) {
|
|
||||||
content += chunk.data.content;
|
|
||||||
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
|
|
||||||
}
|
|
||||||
if (chunk.data.generation_settings) {
|
|
||||||
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
|
|
||||||
}
|
|
||||||
if (chunk.data.timings) {
|
|
||||||
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
|
|
||||||
})();
|
|
||||||
return eventTarget;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call llama, return a promise that resolves to the completed text. This does not support streaming
|
|
||||||
//
|
|
||||||
// Example:
|
|
||||||
//
|
|
||||||
// llamaPromise(prompt).then((content) => {
|
|
||||||
// document.write(content)
|
|
||||||
// })
|
|
||||||
//
|
|
||||||
// or
|
|
||||||
//
|
|
||||||
// const content = await llamaPromise(prompt)
|
|
||||||
// document.write(content)
|
|
||||||
//
|
|
||||||
export const llamaPromise = (prompt, params = {}, config = {}) => {
|
|
||||||
return new Promise(async (resolve, reject) => {
|
|
||||||
let content = "";
|
|
||||||
try {
|
|
||||||
for await (const chunk of llama(prompt, params, config)) {
|
|
||||||
content += chunk.data.content;
|
|
||||||
}
|
|
||||||
resolve(content);
|
|
||||||
} catch (error) {
|
|
||||||
reject(error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* (deprecated)
|
|
||||||
*/
|
|
||||||
export const llamaComplete = async (params, controller, callback) => {
|
|
||||||
for await (const chunk of llama(params.prompt, params, { controller })) {
|
|
||||||
callback(chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the model info from the server. This is useful for getting the context window and so on.
|
|
||||||
export const llamaModelInfo = async (config = {}) => {
|
|
||||||
if (!generation_settings) {
|
|
||||||
const api_url = config.api_url?.replace(/\/+$/, '') || "";
|
|
||||||
const props = await fetch(`${api_url}/props`).then(r => r.json());
|
|
||||||
generation_settings = props.default_generation_settings;
|
|
||||||
}
|
|
||||||
return generation_settings;
|
|
||||||
}
|
|
|
@ -1,7 +1,7 @@
|
||||||
import './styles.css';
|
import './styles.css';
|
||||||
import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
|
import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
|
||||||
import { llama } from './completion.js';
|
|
||||||
import MarkdownIt from 'markdown-it';
|
import MarkdownIt from 'markdown-it';
|
||||||
|
import TextLineStream from 'textlinestream';
|
||||||
|
|
||||||
// utility functions
|
// utility functions
|
||||||
const isString = (x) => !!x.toLowerCase;
|
const isString = (x) => !!x.toLowerCase;
|
||||||
|
@ -192,6 +192,23 @@ const chatScrollToBottom = (requiresNearBottom) => {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// wrapper for SSE
|
||||||
|
async function* sendSSEPostRequest(url, fetchOptions) {
|
||||||
|
const res = await fetch(url, fetchOptions);
|
||||||
|
const lines = res.body
|
||||||
|
.pipeThrough(new TextDecoderStream())
|
||||||
|
.pipeThrough(new TextLineStream());
|
||||||
|
for await (const line of lines) {
|
||||||
|
if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
|
||||||
|
const data = JSON.parse(line.slice(5));
|
||||||
|
yield data;
|
||||||
|
} else if (line.startsWith('error:')) {
|
||||||
|
const data = JSON.parse(line.slice(6));
|
||||||
|
throw new Error(data.message || 'Unknown error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const mainApp = createApp({
|
const mainApp = createApp({
|
||||||
components: {
|
components: {
|
||||||
VueMarkdown,
|
VueMarkdown,
|
||||||
|
@ -331,16 +348,19 @@ const mainApp = createApp({
|
||||||
dry_penalty_last_n: this.config.dry_penalty_last_n,
|
dry_penalty_last_n: this.config.dry_penalty_last_n,
|
||||||
max_tokens: this.config.max_tokens,
|
max_tokens: this.config.max_tokens,
|
||||||
...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
|
...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
|
||||||
...(this.config.apiKey ? { api_key: this.config.apiKey } : {}),
|
|
||||||
};
|
};
|
||||||
const config = {
|
const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
|
||||||
controller: abortController,
|
method: 'POST',
|
||||||
api_url: BASE_URL,
|
headers: {
|
||||||
endpoint: '/chat/completions',
|
'Content-Type': 'application/json',
|
||||||
};
|
'Authorization': this.config.apiKey ? `Bearer ${this.config.apiKey}` : undefined,
|
||||||
for await (const chunk of llama(prompt, params, config)) {
|
},
|
||||||
const stop = chunk.data.stop;
|
body: JSON.stringify(params),
|
||||||
const addedContent = chunk.data.choices[0].delta.content;
|
signal: abortController.signal,
|
||||||
|
});
|
||||||
|
for await (const chunk of chunks) {
|
||||||
|
const stop = chunk.stop;
|
||||||
|
const addedContent = chunk.choices[0].delta.content;
|
||||||
const lastContent = this.pendingMsg.content || '';
|
const lastContent = this.pendingMsg.content || '';
|
||||||
if (addedContent) {
|
if (addedContent) {
|
||||||
this.pendingMsg = {
|
this.pendingMsg = {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue