Conversations
diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json
index 6b93090f0..f9104f65f 100644
--- a/examples/server/webui/package-lock.json
+++ b/examples/server/webui/package-lock.json
@@ -13,6 +13,7 @@
"markdown-it": "^14.1.0",
"postcss": "^8.4.49",
"tailwindcss": "^3.4.15",
+ "textlinestream": "^1.1.1",
"vite-plugin-singlefile": "^2.0.3",
"vue": "^3.5.13"
},
@@ -2677,6 +2678,12 @@
"node": ">=14.0.0"
}
},
+ "node_modules/textlinestream": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/textlinestream/-/textlinestream-1.1.1.tgz",
+ "integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==",
+ "license": "MIT"
+ },
"node_modules/uc.micro": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
diff --git a/examples/server/webui/package.json b/examples/server/webui/package.json
index 2a45ece14..d656a841d 100644
--- a/examples/server/webui/package.json
+++ b/examples/server/webui/package.json
@@ -17,6 +17,7 @@
"markdown-it": "^14.1.0",
"postcss": "^8.4.49",
"tailwindcss": "^3.4.15",
+ "textlinestream": "^1.1.1",
"vite-plugin-singlefile": "^2.0.3",
"vue": "^3.5.13"
}
diff --git a/examples/server/webui/src/completion.js b/examples/server/webui/src/completion.js
deleted file mode 100644
index 54a0f22f5..000000000
--- a/examples/server/webui/src/completion.js
+++ /dev/null
@@ -1,225 +0,0 @@
-const paramDefaults = {
- stream: true,
- temperature: 0.2,
-};
-
-let generation_settings = null;
-
-export class CompletionError extends Error {
- constructor(message, name, data) {
- super(message);
- this.name = name;
- }
-};
-
-// Completes the prompt as a generator. Recommended for most use cases.
-//
-// Example:
-//
-// import { llama } from '/completion.js'
-//
-// const request = llama("Tell me a joke", {n_predict: 800})
-// for await (const chunk of request) {
-// document.write(chunk.data.content)
-// }
-//
-export async function* llama(prompt, params = {}, config = {}) {
- let controller = config.controller;
- const api_url = config.api_url?.replace(/\/+$/, '') || "";
-
- if (!controller) {
- controller = new AbortController();
- }
-
- const completionParams = { ...paramDefaults, ...params, prompt };
-
- const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
- method: 'POST',
- body: JSON.stringify(completionParams),
- headers: {
- 'Connection': 'keep-alive',
- 'Content-Type': 'application/json',
- 'Accept': 'text/event-stream',
- ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
- },
- signal: controller.signal,
- });
-
- const status = response.status;
- if (status !== 200) {
- try {
- const body = await response.json();
- if (body && body.error && body.error.message) {
- throw new CompletionError(body.error.message, 'ServerError');
- }
- } catch (err) {
- throw new CompletionError(err.message, 'ServerError');
- }
- }
-
- const reader = response.body.getReader();
- const decoder = new TextDecoder();
-
- let content = "";
- let leftover = ""; // Buffer for partially read lines
-
- try {
- let cont = true;
-
- while (cont) {
- const result = await reader.read();
- if (result.done) {
- break;
- }
-
- // Add any leftover data to the current chunk of data
- const text = leftover + decoder.decode(result.value);
-
- // Check if the last character is a line break
- const endsWithLineBreak = text.endsWith('\n');
-
- // Split the text into lines
- let lines = text.split('\n');
-
- // If the text doesn't end with a line break, then the last line is incomplete
- // Store it in leftover to be added to the next chunk of data
- if (!endsWithLineBreak) {
- leftover = lines.pop();
- } else {
- leftover = ""; // Reset leftover if we have a line break at the end
- }
-
- // Parse all sse events and add them to result
- const regex = /^(\S+):\s(.*)$/gm;
- for (const line of lines) {
- const match = regex.exec(line);
- if (match) {
- result[match[1]] = match[2];
- if (result.data === '[DONE]') {
- cont = false;
- break;
- }
-
- // since we know this is llama.cpp, let's just decode the json in data
- if (result.data) {
- result.data = JSON.parse(result.data);
- content += result.data.content;
-
- // yield
- yield result;
-
- // if we got a stop token from server, we will break here
- if (result.data.stop) {
- if (result.data.generation_settings) {
- generation_settings = result.data.generation_settings;
- }
- cont = false;
- break;
- }
- }
- if (result.error) {
- try {
- result.error = JSON.parse(result.error);
- if (result.error.message.includes('slot unavailable')) {
- // Throw an error to be caught by upstream callers
- throw new Error('slot unavailable');
- } else {
- console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
- }
- } catch(e) {
- console.error(`llama.cpp error ${result.error}`)
- }
- }
- }
- }
- }
- } catch (e) {
- if (e.name !== 'AbortError') {
- console.error("llama error: ", e);
- }
- throw e;
- }
- finally {
- controller.abort();
- }
-
- return content;
-}
-
-// Call llama, return an event target that you can subscribe to
-//
-// Example:
-//
-// import { llamaEventTarget } from '/completion.js'
-//
-// const conn = llamaEventTarget(prompt)
-// conn.addEventListener("message", (chunk) => {
-// document.write(chunk.detail.content)
-// })
-//
-export const llamaEventTarget = (prompt, params = {}, config = {}) => {
- const eventTarget = new EventTarget();
- (async () => {
- let content = "";
- for await (const chunk of llama(prompt, params, config)) {
- if (chunk.data) {
- content += chunk.data.content;
- eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
- }
- if (chunk.data.generation_settings) {
- eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
- }
- if (chunk.data.timings) {
- eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
- }
- }
- eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
- })();
- return eventTarget;
-}
-
-// Call llama, return a promise that resolves to the completed text. This does not support streaming
-//
-// Example:
-//
-// llamaPromise(prompt).then((content) => {
-// document.write(content)
-// })
-//
-// or
-//
-// const content = await llamaPromise(prompt)
-// document.write(content)
-//
-export const llamaPromise = (prompt, params = {}, config = {}) => {
- return new Promise(async (resolve, reject) => {
- let content = "";
- try {
- for await (const chunk of llama(prompt, params, config)) {
- content += chunk.data.content;
- }
- resolve(content);
- } catch (error) {
- reject(error);
- }
- });
-};
-
-/**
- * (deprecated)
- */
-export const llamaComplete = async (params, controller, callback) => {
- for await (const chunk of llama(params.prompt, params, { controller })) {
- callback(chunk);
- }
-}
-
-// Get the model info from the server. This is useful for getting the context window and so on.
-export const llamaModelInfo = async (config = {}) => {
- if (!generation_settings) {
- const api_url = config.api_url?.replace(/\/+$/, '') || "";
- const props = await fetch(`${api_url}/props`).then(r => r.json());
- generation_settings = props.default_generation_settings;
- }
- return generation_settings;
-}
diff --git a/examples/server/webui/src/main.js b/examples/server/webui/src/main.js
index 9b5b12329..35f5091fc 100644
--- a/examples/server/webui/src/main.js
+++ b/examples/server/webui/src/main.js
@@ -1,7 +1,7 @@
import './styles.css';
import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
-import { llama } from './completion.js';
import MarkdownIt from 'markdown-it';
+import TextLineStream from 'textlinestream';
// utility functions
const isString = (x) => !!x.toLowerCase;
@@ -192,6 +192,23 @@ const chatScrollToBottom = (requiresNearBottom) => {
}
};
+// wrapper for SSE
+async function* sendSSEPostRequest(url, fetchOptions) {
+ const res = await fetch(url, fetchOptions);
+ const lines = res.body
+ .pipeThrough(new TextDecoderStream())
+ .pipeThrough(new TextLineStream());
+ for await (const line of lines) {
+ if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
+ const data = JSON.parse(line.slice(5));
+ yield data;
+ } else if (line.startsWith('error:')) {
+ const data = JSON.parse(line.slice(6));
+ throw new Error(data.message || 'Unknown error');
+ }
+ }
+};
+
const mainApp = createApp({
components: {
VueMarkdown,
@@ -331,16 +348,19 @@ const mainApp = createApp({
dry_penalty_last_n: this.config.dry_penalty_last_n,
max_tokens: this.config.max_tokens,
...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
- ...(this.config.apiKey ? { api_key: this.config.apiKey } : {}),
};
- const config = {
- controller: abortController,
- api_url: BASE_URL,
- endpoint: '/chat/completions',
- };
- for await (const chunk of llama(prompt, params, config)) {
- const stop = chunk.data.stop;
- const addedContent = chunk.data.choices[0].delta.content;
+ const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': this.config.apiKey ? `Bearer ${this.config.apiKey}` : undefined,
+ },
+ body: JSON.stringify(params),
+ signal: abortController.signal,
+ });
+ for await (const chunk of chunks) {
+ const stop = chunk.stop;
+ const addedContent = chunk.choices[0].delta.content;
const lastContent = this.pendingMsg.content || '';
if (addedContent) {
this.pendingMsg = {