From bd2f59e50aaaa8f184272348474b119e9f6fa2b5 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 11 Dec 2024 17:56:43 +0100 Subject: [PATCH] get rid of completion.js --- examples/server/webui/index.html | 2 +- examples/server/webui/package-lock.json | 7 + examples/server/webui/package.json | 1 + examples/server/webui/src/completion.js | 225 ------------------------ examples/server/webui/src/main.js | 40 +++-- 5 files changed, 39 insertions(+), 236 deletions(-) delete mode 100644 examples/server/webui/src/completion.js diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html index c7e18b45e..95c604090 100644 --- a/examples/server/webui/index.html +++ b/examples/server/webui/index.html @@ -15,7 +15,7 @@
-
+

Conversations

diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json index 6b93090f0..f9104f65f 100644 --- a/examples/server/webui/package-lock.json +++ b/examples/server/webui/package-lock.json @@ -13,6 +13,7 @@ "markdown-it": "^14.1.0", "postcss": "^8.4.49", "tailwindcss": "^3.4.15", + "textlinestream": "^1.1.1", "vite-plugin-singlefile": "^2.0.3", "vue": "^3.5.13" }, @@ -2677,6 +2678,12 @@ "node": ">=14.0.0" } }, + "node_modules/textlinestream": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/textlinestream/-/textlinestream-1.1.1.tgz", + "integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==", + "license": "MIT" + }, "node_modules/uc.micro": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", diff --git a/examples/server/webui/package.json b/examples/server/webui/package.json index 2a45ece14..d656a841d 100644 --- a/examples/server/webui/package.json +++ b/examples/server/webui/package.json @@ -17,6 +17,7 @@ "markdown-it": "^14.1.0", "postcss": "^8.4.49", "tailwindcss": "^3.4.15", + "textlinestream": "^1.1.1", "vite-plugin-singlefile": "^2.0.3", "vue": "^3.5.13" } diff --git a/examples/server/webui/src/completion.js b/examples/server/webui/src/completion.js deleted file mode 100644 index 54a0f22f5..000000000 --- a/examples/server/webui/src/completion.js +++ /dev/null @@ -1,225 +0,0 @@ -const paramDefaults = { - stream: true, - temperature: 0.2, -}; - -let generation_settings = null; - -export class CompletionError extends Error { - constructor(message, name, data) { - super(message); - this.name = name; - } -}; - -// Completes the prompt as a generator. Recommended for most use cases. -// -// Example: -// -// import { llama } from '/completion.js' -// -// const request = llama("Tell me a joke", {n_predict: 800}) -// for await (const chunk of request) { -// document.write(chunk.data.content) -// } -// -export async function* llama(prompt, params = {}, config = {}) { - let controller = config.controller; - const api_url = config.api_url?.replace(/\/+$/, '') || ""; - - if (!controller) { - controller = new AbortController(); - } - - const completionParams = { ...paramDefaults, ...params, prompt }; - - const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { - method: 'POST', - body: JSON.stringify(completionParams), - headers: { - 'Connection': 'keep-alive', - 'Content-Type': 'application/json', - 'Accept': 'text/event-stream', - ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) - }, - signal: controller.signal, - }); - - const status = response.status; - if (status !== 200) { - try { - const body = await response.json(); - if (body && body.error && body.error.message) { - throw new CompletionError(body.error.message, 'ServerError'); - } - } catch (err) { - throw new CompletionError(err.message, 'ServerError'); - } - } - - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - - let content = ""; - let leftover = ""; // Buffer for partially read lines - - try { - let cont = true; - - while (cont) { - const result = await reader.read(); - if (result.done) { - break; - } - - // Add any leftover data to the current chunk of data - const text = leftover + decoder.decode(result.value); - - // Check if the last character is a line break - const endsWithLineBreak = text.endsWith('\n'); - - // Split the text into lines - let lines = text.split('\n'); - - // If the text doesn't end with a line break, then the last line is incomplete - // Store it in leftover to be added to the next chunk of data - if (!endsWithLineBreak) { - leftover = lines.pop(); - } else { - leftover = ""; // Reset leftover if we have a line break at the end - } - - // Parse all sse events and add them to result - const regex = /^(\S+):\s(.*)$/gm; - for (const line of lines) { - const match = regex.exec(line); - if (match) { - result[match[1]] = match[2]; - if (result.data === '[DONE]') { - cont = false; - break; - } - - // since we know this is llama.cpp, let's just decode the json in data - if (result.data) { - result.data = JSON.parse(result.data); - content += result.data.content; - - // yield - yield result; - - // if we got a stop token from server, we will break here - if (result.data.stop) { - if (result.data.generation_settings) { - generation_settings = result.data.generation_settings; - } - cont = false; - break; - } - } - if (result.error) { - try { - result.error = JSON.parse(result.error); - if (result.error.message.includes('slot unavailable')) { - // Throw an error to be caught by upstream callers - throw new Error('slot unavailable'); - } else { - console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); - } - } catch(e) { - console.error(`llama.cpp error ${result.error}`) - } - } - } - } - } - } catch (e) { - if (e.name !== 'AbortError') { - console.error("llama error: ", e); - } - throw e; - } - finally { - controller.abort(); - } - - return content; -} - -// Call llama, return an event target that you can subscribe to -// -// Example: -// -// import { llamaEventTarget } from '/completion.js' -// -// const conn = llamaEventTarget(prompt) -// conn.addEventListener("message", (chunk) => { -// document.write(chunk.detail.content) -// }) -// -export const llamaEventTarget = (prompt, params = {}, config = {}) => { - const eventTarget = new EventTarget(); - (async () => { - let content = ""; - for await (const chunk of llama(prompt, params, config)) { - if (chunk.data) { - content += chunk.data.content; - eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); - } - if (chunk.data.generation_settings) { - eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); - } - if (chunk.data.timings) { - eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); - } - } - eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); - })(); - return eventTarget; -} - -// Call llama, return a promise that resolves to the completed text. This does not support streaming -// -// Example: -// -// llamaPromise(prompt).then((content) => { -// document.write(content) -// }) -// -// or -// -// const content = await llamaPromise(prompt) -// document.write(content) -// -export const llamaPromise = (prompt, params = {}, config = {}) => { - return new Promise(async (resolve, reject) => { - let content = ""; - try { - for await (const chunk of llama(prompt, params, config)) { - content += chunk.data.content; - } - resolve(content); - } catch (error) { - reject(error); - } - }); -}; - -/** - * (deprecated) - */ -export const llamaComplete = async (params, controller, callback) => { - for await (const chunk of llama(params.prompt, params, { controller })) { - callback(chunk); - } -} - -// Get the model info from the server. This is useful for getting the context window and so on. -export const llamaModelInfo = async (config = {}) => { - if (!generation_settings) { - const api_url = config.api_url?.replace(/\/+$/, '') || ""; - const props = await fetch(`${api_url}/props`).then(r => r.json()); - generation_settings = props.default_generation_settings; - } - return generation_settings; -} diff --git a/examples/server/webui/src/main.js b/examples/server/webui/src/main.js index 9b5b12329..35f5091fc 100644 --- a/examples/server/webui/src/main.js +++ b/examples/server/webui/src/main.js @@ -1,7 +1,7 @@ import './styles.css'; import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js'; -import { llama } from './completion.js'; import MarkdownIt from 'markdown-it'; +import TextLineStream from 'textlinestream'; // utility functions const isString = (x) => !!x.toLowerCase; @@ -192,6 +192,23 @@ const chatScrollToBottom = (requiresNearBottom) => { } }; +// wrapper for SSE +async function* sendSSEPostRequest(url, fetchOptions) { + const res = await fetch(url, fetchOptions); + const lines = res.body + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()); + for await (const line of lines) { + if (line.startsWith('data:') && !line.endsWith('[DONE]')) { + const data = JSON.parse(line.slice(5)); + yield data; + } else if (line.startsWith('error:')) { + const data = JSON.parse(line.slice(6)); + throw new Error(data.message || 'Unknown error'); + } + } +}; + const mainApp = createApp({ components: { VueMarkdown, @@ -331,16 +348,19 @@ const mainApp = createApp({ dry_penalty_last_n: this.config.dry_penalty_last_n, max_tokens: this.config.max_tokens, ...(this.config.custom.length ? JSON.parse(this.config.custom) : {}), - ...(this.config.apiKey ? { api_key: this.config.apiKey } : {}), }; - const config = { - controller: abortController, - api_url: BASE_URL, - endpoint: '/chat/completions', - }; - for await (const chunk of llama(prompt, params, config)) { - const stop = chunk.data.stop; - const addedContent = chunk.data.choices[0].delta.content; + const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': this.config.apiKey ? `Bearer ${this.config.apiKey}` : undefined, + }, + body: JSON.stringify(params), + signal: abortController.signal, + }); + for await (const chunk of chunks) { + const stop = chunk.stop; + const addedContent = chunk.choices[0].delta.content; const lastContent = this.pendingMsg.content || ''; if (addedContent) { this.pendingMsg = {