get rid of completion.js

2024-12-11 17:56:43 +01:00 · 2024-12-11 17:56:43 +01:00 · bd2f59e50a
commit bd2f59e50a
parent 1a31d0dc00
5 changed files with 39 additions and 236 deletions
--- a/examples/server/webui/index.html
+++ b/examples/server/webui/index.html
@ -15,7 +15,7 @@
      <!-- sidebar -->
      <div class="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
        <label for="toggle-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
-        <div class="flex flex-col bg-base-200 min-h-full max-w-[calc(100vw-2em)] py-4 px-4">
+        <div class="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
          <div class="flex flex-row items-center justify-between mb-4 mt-4">
            <h2 class="font-bold ml-4">Conversations</h2>
--- a/examples/server/webui/package-lock.json
+++ b/examples/server/webui/package-lock.json
@ -13,6 +13,7 @@
        "markdown-it": "^14.1.0",
        "postcss": "^8.4.49",
        "tailwindcss": "^3.4.15",
        "textlinestream": "^1.1.1",
        "vite-plugin-singlefile": "^2.0.3",
        "vue": "^3.5.13"
      },
@ -2677,6 +2678,12 @@
        "node": ">=14.0.0"
      }
    },
    "node_modules/textlinestream": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/textlinestream/-/textlinestream-1.1.1.tgz",
      "integrity": "sha512-iBHbi7BQxrFmwZUQJsT0SjNzlLLsXhvW/kg7EyOMVMBIrlnj/qYofwo1LVLZi+3GbUEo96Iu2eqToI2+lZoAEQ==",
      "license": "MIT"
    },
    "node_modules/uc.micro": {
      "version": "2.1.0",
      "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
--- a/examples/server/webui/package.json
+++ b/examples/server/webui/package.json
@ -17,6 +17,7 @@
    "markdown-it": "^14.1.0",
    "postcss": "^8.4.49",
    "tailwindcss": "^3.4.15",
    "textlinestream": "^1.1.1",
    "vite-plugin-singlefile": "^2.0.3",
    "vue": "^3.5.13"
  }
--- a/examples/server/webui/src/completion.js
+++ b/examples/server/webui/src/completion.js
@ -1,225 +0,0 @@
 const paramDefaults = {
  stream: true,
  temperature: 0.2,
 };
 let generation_settings = null;
 export class CompletionError extends Error {
  constructor(message, name, data) {
    super(message);
    this.name = name;
  }
 };
 // Completes the prompt as a generator. Recommended for most use cases.
 //
 // Example:
 //
 //    import { llama } from '/completion.js'
 //
 //    const request = llama("Tell me a joke", {n_predict: 800})
 //    for await (const chunk of request) {
 //      document.write(chunk.data.content)
 //    }
 //
 export async function* llama(prompt, params = {}, config = {}) {
  let controller = config.controller;
  const api_url = config.api_url?.replace(/\/+$/, '') || "";
  if (!controller) {
    controller = new AbortController();
  }
  const completionParams = { ...paramDefaults, ...params, prompt };
  const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
    method: 'POST',
    body: JSON.stringify(completionParams),
    headers: {
      'Connection': 'keep-alive',
      'Content-Type': 'application/json',
      'Accept': 'text/event-stream',
      ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
    },
    signal: controller.signal,
  });
  const status = response.status;
  if (status !== 200) {
    try {
      const body = await response.json();
      if (body && body.error && body.error.message) {
        throw new CompletionError(body.error.message, 'ServerError');
      }
    } catch (err) {
      throw new CompletionError(err.message, 'ServerError');
    }
  }
  const reader = response.body.getReader();
  const decoder = new TextDecoder();
  let content = "";
  let leftover = ""; // Buffer for partially read lines
  try {
    let cont = true;
    while (cont) {
      const result = await reader.read();
      if (result.done) {
        break;
      }
      // Add any leftover data to the current chunk of data
      const text = leftover + decoder.decode(result.value);
      // Check if the last character is a line break
      const endsWithLineBreak = text.endsWith('\n');
      // Split the text into lines
      let lines = text.split('\n');
      // If the text doesn't end with a line break, then the last line is incomplete
      // Store it in leftover to be added to the next chunk of data
      if (!endsWithLineBreak) {
        leftover = lines.pop();
      } else {
        leftover = ""; // Reset leftover if we have a line break at the end
      }
      // Parse all sse events and add them to result
      const regex = /^(\S+):\s(.*)$/gm;
      for (const line of lines) {
        const match = regex.exec(line);
        if (match) {
          result[match[1]] = match[2];
          if (result.data === '[DONE]') {
            cont = false;
            break;
          }
          // since we know this is llama.cpp, let's just decode the json in data
          if (result.data) {
            result.data = JSON.parse(result.data);
            content += result.data.content;
            // yield
            yield result;
            // if we got a stop token from server, we will break here
            if (result.data.stop) {
              if (result.data.generation_settings) {
                generation_settings = result.data.generation_settings;
              }
              cont = false;
              break;
            }
          }
          if (result.error) {
            try {
              result.error = JSON.parse(result.error);
              if (result.error.message.includes('slot unavailable')) {
                // Throw an error to be caught by upstream callers
                throw new Error('slot unavailable');
              } else {
                console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
              }
            } catch(e) {
              console.error(`llama.cpp error ${result.error}`)
            }
          }
        }
      }
    }
  } catch (e) {
    if (e.name !== 'AbortError') {
      console.error("llama error: ", e);
    }
    throw e;
  }
  finally {
    controller.abort();
  }
  return content;
 }
 // Call llama, return an event target that you can subscribe to
 //
 // Example:
 //
 //    import { llamaEventTarget } from '/completion.js'
 //
 //    const conn = llamaEventTarget(prompt)
 //    conn.addEventListener("message", (chunk) => {
 //      document.write(chunk.detail.content)
 //    })
 //
 export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  const eventTarget = new EventTarget();
  (async () => {
    let content = "";
    for await (const chunk of llama(prompt, params, config)) {
      if (chunk.data) {
        content += chunk.data.content;
        eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
      }
      if (chunk.data.generation_settings) {
        eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
      }
      if (chunk.data.timings) {
        eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
      }
    }
    eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  })();
  return eventTarget;
 }
 // Call llama, return a promise that resolves to the completed text. This does not support streaming
 //
 // Example:
 //
 //     llamaPromise(prompt).then((content) => {
 //       document.write(content)
 //     })
 //
 //     or
 //
 //     const content = await llamaPromise(prompt)
 //     document.write(content)
 //
 export const llamaPromise = (prompt, params = {}, config = {}) => {
  return new Promise(async (resolve, reject) => {
    let content = "";
    try {
      for await (const chunk of llama(prompt, params, config)) {
        content += chunk.data.content;
      }
      resolve(content);
    } catch (error) {
      reject(error);
    }
  });
 };
 /**
 * (deprecated)
 */
 export const llamaComplete = async (params, controller, callback) => {
  for await (const chunk of llama(params.prompt, params, { controller })) {
    callback(chunk);
  }
 }
 // Get the model info from the server. This is useful for getting the context window and so on.
 export const llamaModelInfo = async (config = {}) => {
  if (!generation_settings) {
    const api_url = config.api_url?.replace(/\/+$/, '') || "";
    const props = await fetch(`${api_url}/props`).then(r => r.json());
    generation_settings = props.default_generation_settings;
  }
  return generation_settings;
 }
--- a/examples/server/webui/src/main.js
+++ b/examples/server/webui/src/main.js
@ -1,7 +1,7 @@
 import './styles.css';
 import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
 import { llama } from './completion.js';
 import MarkdownIt from 'markdown-it';
 import TextLineStream from 'textlinestream';
 // utility functions
 const isString = (x) => !!x.toLowerCase;
@ -192,6 +192,23 @@ const chatScrollToBottom = (requiresNearBottom) => {
  }
 };
 // wrapper for SSE
 async function* sendSSEPostRequest(url, fetchOptions) {
  const res = await fetch(url, fetchOptions);
  const lines = res.body
    .pipeThrough(new TextDecoderStream())
    .pipeThrough(new TextLineStream());
  for await (const line of lines) {
    if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
      const data = JSON.parse(line.slice(5));
      yield data;
    } else if (line.startsWith('error:')) {
      const data = JSON.parse(line.slice(6));
      throw new Error(data.message || 'Unknown error');
    }
  }
 };
 const mainApp = createApp({
  components: {
    VueMarkdown,
@ -331,16 +348,19 @@ const mainApp = createApp({
          dry_penalty_last_n: this.config.dry_penalty_last_n,
          max_tokens: this.config.max_tokens,
          ...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
          ...(this.config.apiKey ? { api_key: this.config.apiKey } : {}),
        };
-        const config = {
+        const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
-          controller: abortController,
+          method: 'POST',
-          api_url: BASE_URL,
+          headers: {
-          endpoint: '/chat/completions',
+            'Content-Type': 'application/json',
-        };
+            'Authorization': this.config.apiKey ? `Bearer ${this.config.apiKey}` : undefined,
-        for await (const chunk of llama(prompt, params, config)) {
+          },
-          const stop = chunk.data.stop;
+          body: JSON.stringify(params),
-          const addedContent = chunk.data.choices[0].delta.content;
+          signal: abortController.signal,
        });
        for await (const chunk of chunks) {
          const stop = chunk.stop;
          const addedContent = chunk.choices[0].delta.content;
          const lastContent = this.pendingMsg.content || '';
          if (addedContent) {
            this.pendingMsg = {