server : (web ui) Various improvements, now use vite as bundler (#10599)

* hide buttons in dropdown menu * use npm as deps manager and vite as bundler * fix build * fix build (2) * fix responsive on mobile * fix more problems on mobile * sync build * (test) add CI step for verifying build * fix ci * force rebuild .hpp files * cmake: clean up generated files pre build
2024-12-03 19:38:44 +01:00 · 2024-12-03 19:38:44 +01:00 · 91c36c269b
commit 91c36c269b
parent 1cd3df46bd
21 changed files with 3941 additions and 27406 deletions
--- a/examples/server/webui/index.html
+++ b/examples/server/webui/index.html
@ -0,0 +1,268 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
+  <meta name="color-scheme" content="light dark">
+  <title>🦙 llama.cpp - chat</title>
+</head>
+
+<body>
+  <div id="app" class="opacity-0"> <!-- opacity-0 will be removed on app mounted -->
+    <div class="flex flex-row drawer lg:drawer-open">
+      <input id="toggle-drawer" type="checkbox" class="drawer-toggle" checked />
+
+      <!-- sidebar -->
+      <div class="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
+        <label for="toggle-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
+        <div class="flex flex-col bg-base-200 min-h-full max-w-[calc(100vw-2em)] py-4 px-4">
+          <div class="flex flex-row items-center justify-between mb-4 mt-4">
+            <h2 class="font-bold ml-4">Conversations</h2>
+
+            <!-- close sidebar button -->
+            <label for="toggle-drawer" class="btn btn-ghost lg:hidden">
+              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-arrow-bar-left" viewBox="0 0 16 16">
+                <path fill-rule="evenodd" d="M12.5 15a.5.5 0 0 1-.5-.5v-13a.5.5 0 0 1 1 0v13a.5.5 0 0 1-.5.5M10 8a.5.5 0 0 1-.5.5H3.707l2.147 2.146a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 1 1 .708.708L3.707 7.5H9.5a.5.5 0 0 1 .5.5"/>
+              </svg>
+            </label>
+          </div>
+
+          <!-- list of conversations -->
+          <div :class="{
+            'btn btn-ghost justify-start': true,
+            'btn-active': messages.length === 0,
+          }" @click="newConversation">
+            + New conversation
+          </div>
+          <div v-for="conv in conversations" :class="{
+            'btn btn-ghost justify-start font-normal': true,
+            'btn-active': conv.id === viewingConvId,
+          }" @click="setViewingConv(conv.id)">
+            <span class="truncate">{{ conv.messages[0].content }}</span>
+          </div>
+          <div class="text-center text-xs opacity-40 mt-auto mx-4">
+            Conversations are saved to browser's localStorage
+          </div>
+        </div>
+      </div>
+
+      <!-- main view -->
+      <div class="chat-screen drawer-content grow flex flex-col h-screen w-screen mx-auto px-4">
+        <!-- header -->
+        <div class="flex flex-row items-center mt-6 mb-6">
+          <!-- open sidebar button -->
+          <label for="toggle-drawer" class="btn btn-ghost lg:hidden">
+            <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-list" viewBox="0 0 16 16">
+              <path fill-rule="evenodd" d="M2.5 12a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5"/>
+            </svg>
+          </label>
+
+          <div class="grow text-2xl font-bold ml-2">llama.cpp</div>
+
+          <!-- action buttons (top right) -->
+          <div class="flex items-center">
+            <div v-if="messages.length > 0" class="dropdown dropdown-end">
+              <!-- "more" button -->
+              <button tabindex="0" role="button" class="btn m-1" :disabled="isGenerating">
+                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-three-dots-vertical" viewBox="0 0 16 16">
+                  <path d="M9.5 13a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0"/>
+                </svg>
+              </button>
+              <!-- "more" dropdown menu -->
+              <ul tabindex="0" class="dropdown-content menu bg-base-100 rounded-box z-[1] w-52 p-2 shadow">
+                <li @click="downloadConv(viewingConvId)"><a>Download</a></li>
+                <li class="text-error" @click="deleteConv(viewingConvId)"><a>Delete</a></li>
+              </ul>
+            </div>
+            <button class="btn" @click="showConfigDialog = true" :disabled="isGenerating">
+              <!-- settings button -->
+              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-gear" viewBox="0 0 16 16">
+                <path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492M5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0"/>
+                <path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52zm-2.633.283c.246-.835 1.428-.835 1.674 0l.094.319a1.873 1.873 0 0 0 2.693 1.115l.291-.16c.764-.415 1.6.42 1.184 1.185l-.159.292a1.873 1.873 0 0 0 1.116 2.692l.318.094c.835.246.835 1.428 0 1.674l-.319.094a1.873 1.873 0 0 0-1.115 2.693l.16.291c.415.764-.42 1.6-1.185 1.184l-.291-.159a1.873 1.873 0 0 0-2.693 1.116l-.094.318c-.246.835-1.428.835-1.674 0l-.094-.319a1.873 1.873 0 0 0-2.692-1.115l-.292.16c-.764.415-1.6-.42-1.184-1.185l.159-.291A1.873 1.873 0 0 0 1.945 8.93l-.319-.094c-.835-.246-.835-1.428 0-1.674l.319-.094A1.873 1.873 0 0 0 3.06 4.377l-.16-.292c-.415-.764.42-1.6 1.185-1.184l.292.159a1.873 1.873 0 0 0 2.692-1.115z"/>
+              </svg>
+            </button>
+
+            <!-- theme controller is copied from https://daisyui.com/components/theme-controller/ -->
+            <div class="dropdown dropdown-end dropdown-bottom">
+              <div tabindex="0" role="button" class="btn m-1">
+                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-palette2" viewBox="0 0 16 16">
+                  <path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z"/>
+                </svg>
+              </div>
+              <ul tabindex="0" class="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto">
+                <li>
+                  <button
+                    class="btn btn-sm btn-block btn-ghost justify-start"
+                    :class="{ 'btn-active': selectedTheme === 'auto' }"
+                    @click="setSelectedTheme('auto')">
+                    auto
+                  </button>
+                </li>
+                <li v-for="theme in themes">
+                  <input
+                    type="radio"
+                    name="theme-dropdown"
+                    class="theme-controller btn btn-sm btn-block btn-ghost justify-start"
+                    :aria-label="theme"
+                    :value="theme"
+                    :checked="selectedTheme === theme"
+                    @click="setSelectedTheme(theme)" />
+                </li>
+              </ul>
+            </div>
+          </div>
+        </div>
+
+        <!-- chat messages -->
+        <div id="messages-list" class="flex flex-col grow overflow-y-auto">
+          <div class="mt-auto flex justify-center">
+            <!-- placeholder to shift the message to the bottom -->
+            {{ messages.length === 0 ? 'Send a message to start' : '' }}
+          </div>
+          <div v-for="msg in messages" class="group">
+            <div :class="{
+              'chat': true,
+              'chat-start': msg.role !== 'user',
+              'chat-end': msg.role === 'user',
+            }">
+              <div :class="{
+                'chat-bubble markdown': true,
+                'chat-bubble-base-300': msg.role !== 'user',
+              }">
+                <!-- textarea for editing message -->
+                <template v-if="editingMsg && editingMsg.id === msg.id">
+                  <textarea
+                    class="textarea textarea-bordered bg-base-100 text-base-content w-[calc(90vw-8em)] lg:w-96"
+                    v-model="msg.content"></textarea>
+                  <br/>
+                  <button class="btn btn-ghost mt-2 mr-2" @click="editingMsg = null">Cancel</button>
+                  <button class="btn mt-2" @click="editUserMsgAndRegenerate(msg)">Submit</button>
+                </template>
+                <!-- render message as markdown -->
+                <vue-markdown v-else :source="msg.content" />
+              </div>
+            </div>
+
+            <!-- actions for each message -->
+            <div :class="{'text-right': msg.role === 'user'}" class="mx-4 mt-2 mb-2">
+              <!-- user message -->
+              <button v-if="msg.role === 'user'" class="badge btn-mini show-on-hover" @click="editingMsg = msg" :disabled="isGenerating">
+                ✍️ Edit
+              </button>
+              <!-- assistant message -->
+              <button v-if="msg.role === 'assistant'" class="badge btn-mini show-on-hover mr-2" @click="regenerateMsg(msg)" :disabled="isGenerating">
+                🔄 Regenerate
+              </button>
+              <button v-if="msg.role === 'assistant'" class="badge btn-mini show-on-hover mr-2" @click="copyMsg(msg)" :disabled="isGenerating">
+                📋 Copy
+              </button>
+            </div>
+          </div>
+
+          <!-- pending (ongoing) assistant message -->
+          <div id="pending-msg" class="chat chat-start">
+            <div v-if="pendingMsg" class="chat-bubble markdown chat-bubble-base-300">
+              <span v-if="!pendingMsg.content" class="loading loading-dots loading-md"></span>
+              <vue-markdown v-else :source="pendingMsg.content" />
+            </div>
+          </div>
+        </div>
+
+        <!-- chat input -->
+        <div class="flex flex-row items-center mt-8 mb-6">
+          <textarea
+            class="textarea textarea-bordered w-full"
+            placeholder="Type a message (Shift+Enter to add a new line)"
+            v-model="inputMsg"
+            @keydown.enter.exact.prevent="sendMessage"
+            @keydown.enter.shift.exact.prevent="inputMsg += '\n'"
+            :disabled="isGenerating"
+            id="msg-input"
+          ></textarea>
+          <button v-if="!isGenerating" class="btn btn-primary ml-2" @click="sendMessage" :disabled="inputMsg.length === 0">Send</button>
+          <button v-else class="btn btn-neutral ml-2" @click="stopGeneration">Stop</button>
+        </div>
+      </div>
+
+    </div>
+
+
+    <!-- modal for editing config -->
+    <dialog class="modal" :class="{'modal-open': showConfigDialog}">
+      <div class="modal-box">
+        <h3 class="text-lg font-bold mb-6">Settings</h3>
+        <div class="h-[calc(90vh-12rem)] overflow-y-auto">
+          <p class="opacity-40 mb-6">Settings below are saved in browser's localStorage</p>
+          <settings-modal-short-input :config-key="'apiKey'" :config-default="configDefault" :config-info="configInfo" v-model="config.apiKey"></settings-modal-short-input>
+          <label class="form-control mb-2">
+            <div class="label">System Message</div>
+            <textarea class="textarea textarea-bordered h-24" :placeholder="'Default: ' + configDefault.systemMessage" v-model="config.systemMessage"></textarea>
+          </label>
+          <template v-for="configKey in ['temperature', 'top_k', 'top_p', 'min_p', 'max_tokens']">
+            <settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
+          </template>
+          <!-- TODO: add more sampling-related configs, please regroup them into different "collapse" sections -->
+          <!-- Section: Other sampler settings -->
+          <details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
+            <summary class="collapse-title font-bold">Other sampler settings</summary>
+            <div class="collapse-content">
+              <!-- Samplers queue -->
+              <settings-modal-short-input label="Samplers queue" :config-key="'samplers'" :config-default="configDefault" :config-info="configInfo" v-model="config.samplers"></settings-modal-short-input>
+              <!-- Samplers -->
+              <template v-for="configKey in ['dynatemp_range', 'dynatemp_exponent', 'typical_p', 'xtc_probability', 'xtc_threshold']">
+                <settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
+              </template>
+            </div>
+          </details>
+          <!-- Section: Penalties settings -->
+          <details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
+            <summary class="collapse-title font-bold">Penalties settings</summary>
+            <div class="collapse-content">
+              <template v-for="configKey in ['repeat_last_n', 'repeat_penalty', 'presence_penalty', 'frequency_penalty', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_penalty_last_n']">
+                <settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
+              </template>
+            </div>
+          </details>
+          <!-- Section: Advanced config -->
+          <details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
+            <summary class="collapse-title font-bold">Advanced config</summary>
+            <div class="collapse-content">
+              <label class="form-control mb-2">
+                <!-- Custom parameters input -->
+                <div class="label inline">Custom JSON config (For more info, refer to <a class="underline" href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md" target="_blank" rel="noopener noreferrer">server documentation</a>)</div>
+                <textarea class="textarea textarea-bordered h-24" placeholder="Example: { &quot;mirostat&quot;: 1, &quot;min_p&quot;: 0.1 }" v-model="config.custom"></textarea>
+              </label>
+            </div>
+          </details>
+        </div>
+
+        <!-- action buttons -->
+        <div class="modal-action">
+          <button class="btn" @click="resetConfigDialog">Reset to default</button>
+          <button class="btn" @click="closeAndDiscardConfigDialog">Close</button>
+          <button class="btn btn-primary" @click="closeAndSaveConfigDialog">Save</button>
+        </div>
+      </div>
+    </dialog>
+
+  </div>
+
+  <!-- Template to be used by settings modal -->
+  <template id="settings-modal-short-input">
+    <label class="input input-bordered join-item grow flex items-center gap-2 mb-2">
+      <!-- Show help message on hovering on the input label -->
+      <div class="dropdown dropdown-hover">
+        <div tabindex="0" role="button" class="font-bold">{{ label || configKey }}</div>
+        <div class="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
+          {{ configInfo[configKey] || '(no help message available)' }}
+        </div>
+      </div>
+      <!-- Here we forward v-model from parent to child component, see: https://stackoverflow.com/questions/47311936/v-model-and-child-components -->
+      <input type="text" class="grow" :placeholder="'Default: ' + (configDefault[configKey] || 'none')" :value="modelValue" @input="$emit('update:modelValue', $event.target.value)" />
+    </label>
+  </template>
+
+  <script type="module" src="/src/main.js"></script>
+</body>
+
+</html>
--- a/examples/server/webui/package-lock.json
+++ b/examples/server/webui/package-lock.json
--- a/examples/server/webui/package.json
+++ b/examples/server/webui/package.json
@ -0,0 +1,23 @@
+{
+  "name": "webui",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.4.10"
+  },
+  "dependencies": {
+    "autoprefixer": "^10.4.20",
+    "daisyui": "^4.12.14",
+    "markdown-it": "^14.1.0",
+    "postcss": "^8.4.49",
+    "tailwindcss": "^3.4.15",
+    "vite-plugin-singlefile": "^2.0.3",
+    "vue": "^3.5.13"
+  }
+}
--- a/examples/server/webui/postcss.config.js
+++ b/examples/server/webui/postcss.config.js
@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
--- a/examples/server/webui/src/completion.js
+++ b/examples/server/webui/src/completion.js
@ -0,0 +1,225 @@
+const paramDefaults = {
+  stream: true,
+  temperature: 0.2,
+};
+
+let generation_settings = null;
+
+export class CompletionError extends Error {
+  constructor(message, name, data) {
+    super(message);
+    this.name = name;
+  }
+};
+
+// Completes the prompt as a generator. Recommended for most use cases.
+//
+// Example:
+//
+//    import { llama } from '/completion.js'
+//
+//    const request = llama("Tell me a joke", {n_predict: 800})
+//    for await (const chunk of request) {
+//      document.write(chunk.data.content)
+//    }
+//
+export async function* llama(prompt, params = {}, config = {}) {
+  let controller = config.controller;
+  const api_url = config.api_url?.replace(/\/+$/, '') || "";
+
+  if (!controller) {
+    controller = new AbortController();
+  }
+
+  const completionParams = { ...paramDefaults, ...params, prompt };
+
+  const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
+    method: 'POST',
+    body: JSON.stringify(completionParams),
+    headers: {
+      'Connection': 'keep-alive',
+      'Content-Type': 'application/json',
+      'Accept': 'text/event-stream',
+      ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
+    },
+    signal: controller.signal,
+  });
+
+  const status = response.status;
+  if (status !== 200) {
+    try {
+      const body = await response.json();
+      if (body && body.error && body.error.message) {
+        throw new CompletionError(body.error.message, 'ServerError');
+      }
+    } catch (err) {
+      throw new CompletionError(err.message, 'ServerError');
+    }
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+
+  let content = "";
+  let leftover = ""; // Buffer for partially read lines
+
+  try {
+    let cont = true;
+
+    while (cont) {
+      const result = await reader.read();
+      if (result.done) {
+        break;
+      }
+
+      // Add any leftover data to the current chunk of data
+      const text = leftover + decoder.decode(result.value);
+
+      // Check if the last character is a line break
+      const endsWithLineBreak = text.endsWith('\n');
+
+      // Split the text into lines
+      let lines = text.split('\n');
+
+      // If the text doesn't end with a line break, then the last line is incomplete
+      // Store it in leftover to be added to the next chunk of data
+      if (!endsWithLineBreak) {
+        leftover = lines.pop();
+      } else {
+        leftover = ""; // Reset leftover if we have a line break at the end
+      }
+
+      // Parse all sse events and add them to result
+      const regex = /^(\S+):\s(.*)$/gm;
+      for (const line of lines) {
+        const match = regex.exec(line);
+        if (match) {
+          result[match[1]] = match[2];
+          if (result.data === '[DONE]') {
+            cont = false;
+            break;
+          }
+
+          // since we know this is llama.cpp, let's just decode the json in data
+          if (result.data) {
+            result.data = JSON.parse(result.data);
+            content += result.data.content;
+
+            // yield
+            yield result;
+
+            // if we got a stop token from server, we will break here
+            if (result.data.stop) {
+              if (result.data.generation_settings) {
+                generation_settings = result.data.generation_settings;
+              }
+              cont = false;
+              break;
+            }
+          }
+          if (result.error) {
+            try {
+              result.error = JSON.parse(result.error);
+              if (result.error.message.includes('slot unavailable')) {
+                // Throw an error to be caught by upstream callers
+                throw new Error('slot unavailable');
+              } else {
+                console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
+              }
+            } catch(e) {
+              console.error(`llama.cpp error ${result.error}`)
+            }
+          }
+        }
+      }
+    }
+  } catch (e) {
+    if (e.name !== 'AbortError') {
+      console.error("llama error: ", e);
+    }
+    throw e;
+  }
+  finally {
+    controller.abort();
+  }
+
+  return content;
+}
+
+// Call llama, return an event target that you can subscribe to
+//
+// Example:
+//
+//    import { llamaEventTarget } from '/completion.js'
+//
+//    const conn = llamaEventTarget(prompt)
+//    conn.addEventListener("message", (chunk) => {
+//      document.write(chunk.detail.content)
+//    })
+//
+export const llamaEventTarget = (prompt, params = {}, config = {}) => {
+  const eventTarget = new EventTarget();
+  (async () => {
+    let content = "";
+    for await (const chunk of llama(prompt, params, config)) {
+      if (chunk.data) {
+        content += chunk.data.content;
+        eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
+      }
+      if (chunk.data.generation_settings) {
+        eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
+      }
+      if (chunk.data.timings) {
+        eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
+      }
+    }
+    eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
+  })();
+  return eventTarget;
+}
+
+// Call llama, return a promise that resolves to the completed text. This does not support streaming
+//
+// Example:
+//
+//     llamaPromise(prompt).then((content) => {
+//       document.write(content)
+//     })
+//
+//     or
+//
+//     const content = await llamaPromise(prompt)
+//     document.write(content)
+//
+export const llamaPromise = (prompt, params = {}, config = {}) => {
+  return new Promise(async (resolve, reject) => {
+    let content = "";
+    try {
+      for await (const chunk of llama(prompt, params, config)) {
+        content += chunk.data.content;
+      }
+      resolve(content);
+    } catch (error) {
+      reject(error);
+    }
+  });
+};
+
+/**
+ * (deprecated)
+ */
+export const llamaComplete = async (params, controller, callback) => {
+  for await (const chunk of llama(params.prompt, params, { controller })) {
+    callback(chunk);
+  }
+}
+
+// Get the model info from the server. This is useful for getting the context window and so on.
+export const llamaModelInfo = async (config = {}) => {
+  if (!generation_settings) {
+    const api_url = config.api_url?.replace(/\/+$/, '') || "";
+    const props = await fetch(`${api_url}/props`).then(r => r.json());
+    generation_settings = props.default_generation_settings;
+  }
+  return generation_settings;
+}
--- a/examples/server/webui/src/main.js
+++ b/examples/server/webui/src/main.js
@ -0,0 +1,456 @@
+import './styles.css';
+import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
+import { llama } from './completion.js';
+import MarkdownIt from 'markdown-it';
+
+// utility functions
+const isString = (x) => !!x.toLowerCase;
+const isNumeric = (n) => !isString(n) && !isNaN(n);
+const escapeAttr = (str) => str.replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+const copyStr = (str) => navigator.clipboard.writeText(str);
+
+// constants
+const BASE_URL = localStorage.getItem('base') // for debugging
+  || (new URL('.', document.baseURI).href).toString(); // for production
+const CONFIG_DEFAULT = {
+  // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
+  apiKey: '',
+  systemMessage: 'You are a helpful assistant.',
+  // make sure these default values are in sync with `common.h`
+  samplers: 'dkypmxt',
+  temperature: 0.8,
+  dynatemp_range: 0.0,
+  dynatemp_exponent: 1.0,
+  top_k: 40,
+  top_p: 0.95,
+  min_p: 0.05,
+  xtc_probability: 0.0,
+  xtc_threshold: 0.1,
+  typical_p: 1.0,
+  repeat_last_n: 64,
+  repeat_penalty: 1.0,
+  presence_penalty: 0.0,
+  frequency_penalty: 0.0,
+  dry_multiplier: 0.0,
+  dry_base: 1.75,
+  dry_allowed_length: 2,
+  dry_penalty_last_n: -1,
+  max_tokens: -1,
+  custom: '', // custom json-stringified object
+};
+const CONFIG_INFO = {
+  apiKey: 'Set the API Key if you are using --api-key option for the server.',
+  systemMessage: 'The starting message that defines how model should behave.',
+  samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
+  temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
+  dynatemp_range: 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
+  dynatemp_exponent: 'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
+  top_k: 'Keeps only k top tokens.',
+  top_p: 'Limits tokens to those that together have a cumulative probability of at least p',
+  min_p: 'Limits tokens based on the minimum probability for a token to be considered, relative to the probability of the most likely token.',
+  xtc_probability: 'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
+  xtc_threshold: 'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
+  typical_p: 'Sorts and limits tokens based on the difference between log-probability and entropy.',
+  repeat_last_n: 'Last n tokens to consider for penalizing repetition',
+  repeat_penalty: 'Controls the repetition of token sequences in the generated text',
+  presence_penalty: 'Limits tokens based on whether they appear in the output or not.',
+  frequency_penalty: 'Limits tokens based on how often they appear in the output.',
+  dry_multiplier: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling multiplier.',
+  dry_base: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling base value.',
+  dry_allowed_length: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the allowed length for DRY sampling.',
+  dry_penalty_last_n: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.',
+  max_tokens: 'The maximum number of token per output.',
+  custom: '', // custom json-stringified object
+};
+// config keys having numeric value (i.e. temperature, top_k, top_p, etc)
+const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT).filter(e => isNumeric(e[1])).map(e => e[0]);
+// list of themes supported by daisyui
+const THEMES = ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'];
+
+// markdown support
+const VueMarkdown = defineComponent(
+  (props) => {
+    const md = shallowRef(new MarkdownIt({ breaks: true }));
+    const origFenchRenderer = md.value.renderer.rules.fence;
+    md.value.renderer.rules.fence = (tokens, idx, ...args) => {
+      const content = tokens[idx].content;
+      const origRendered = origFenchRenderer(tokens, idx, ...args);
+      return `<div class="relative my-4">
+        <div class="text-right sticky top-4 mb-2 mr-2 h-0">
+          <button class="badge btn-mini" onclick="copyStr(${escapeAttr(JSON.stringify(content))})">📋 Copy</button>
+        </div>
+        ${origRendered}
+      </div>`;
+    };
+    window.copyStr = copyStr;
+    const content = computed(() => md.value.render(props.source));
+    return () => h("div", { innerHTML: content.value });
+  },
+  { props: ["source"] }
+);
+
+// input field to be used by settings modal
+const SettingsModalShortInput = defineComponent({
+  template: document.getElementById('settings-modal-short-input').innerHTML,
+  props: {
+    label: { type: String, required: false },
+    configKey: String,
+    configDefault: Object,
+    configInfo: Object,
+    modelValue: [Object, String, Number],
+  },
+});
+
+// coversations is stored in localStorage
+// format: { [convId]: { id: string, lastModified: number, messages: [...] } }
+// convId is a string prefixed with 'conv-'
+const StorageUtils = {
+  // manage conversations
+  getAllConversations() {
+    const res = [];
+    for (const key in localStorage) {
+      if (key.startsWith('conv-')) {
+        res.push(JSON.parse(localStorage.getItem(key)));
+      }
+    }
+    res.sort((a, b) => b.lastModified - a.lastModified);
+    return res;
+  },
+  // can return null if convId does not exist
+  getOneConversation(convId) {
+    return JSON.parse(localStorage.getItem(convId) || 'null');
+  },
+  // if convId does not exist, create one
+  appendMsg(convId, msg) {
+    if (msg.content === null) return;
+    const conv = StorageUtils.getOneConversation(convId) || {
+      id: convId,
+      lastModified: Date.now(),
+      messages: [],
+    };
+    conv.messages.push(msg);
+    conv.lastModified = Date.now();
+    localStorage.setItem(convId, JSON.stringify(conv));
+  },
+  getNewConvId() {
+    return `conv-${Date.now()}`;
+  },
+  remove(convId) {
+    localStorage.removeItem(convId);
+  },
+  filterAndKeepMsgs(convId, predicate) {
+    const conv = StorageUtils.getOneConversation(convId);
+    if (!conv) return;
+    conv.messages = conv.messages.filter(predicate);
+    conv.lastModified = Date.now();
+    localStorage.setItem(convId, JSON.stringify(conv));
+  },
+  popMsg(convId) {
+    const conv = StorageUtils.getOneConversation(convId);
+    if (!conv) return;
+    const msg = conv.messages.pop();
+    conv.lastModified = Date.now();
+    if (conv.messages.length === 0) {
+      StorageUtils.remove(convId);
+    } else {
+      localStorage.setItem(convId, JSON.stringify(conv));
+    }
+    return msg;
+  },
+
+  // manage config
+  getConfig() {
+    const savedVal = JSON.parse(localStorage.getItem('config') || '{}');
+    // to prevent breaking changes in the future, we always provide default value for missing keys
+    return {
+      ...CONFIG_DEFAULT,
+      ...savedVal,
+    };
+  },
+  setConfig(config) {
+    localStorage.setItem('config', JSON.stringify(config));
+  },
+  getTheme() {
+    return localStorage.getItem('theme') || 'auto';
+  },
+  setTheme(theme) {
+    if (theme === 'auto') {
+      localStorage.removeItem('theme');
+    } else {
+      localStorage.setItem('theme', theme);
+    }
+  },
+};
+
+// scroll to bottom of chat messages
+// if requiresNearBottom is true, only auto-scroll if user is near bottom
+const chatScrollToBottom = (requiresNearBottom) => {
+  const msgListElem = document.getElementById('messages-list');
+  const spaceToBottom = msgListElem.scrollHeight - msgListElem.scrollTop - msgListElem.clientHeight;
+  if (!requiresNearBottom || (spaceToBottom < 100)) {
+    setTimeout(() => msgListElem.scrollTo({ top: msgListElem.scrollHeight }), 1);
+  }
+};
+
+const mainApp = createApp({
+  components: {
+    VueMarkdown,
+    SettingsModalShortInput,
+  },
+  data() {
+    return {
+      conversations: StorageUtils.getAllConversations(),
+      messages: [], // { id: number, role: 'user' | 'assistant', content: string }
+      viewingConvId: StorageUtils.getNewConvId(),
+      inputMsg: '',
+      isGenerating: false,
+      pendingMsg: null, // the on-going message from assistant
+      stopGeneration: () => {},
+      selectedTheme: StorageUtils.getTheme(),
+      config: StorageUtils.getConfig(),
+      showConfigDialog: false,
+      editingMsg: null,
+      // const
+      themes: THEMES,
+      configDefault: {...CONFIG_DEFAULT},
+      configInfo: {...CONFIG_INFO},
+    }
+  },
+  computed: {},
+  mounted() {
+    document.getElementById('app').classList.remove('opacity-0'); // show app
+    // scroll to the bottom when the pending message height is updated
+    const pendingMsgElem = document.getElementById('pending-msg');
+    const resizeObserver = new ResizeObserver(() => {
+      if (this.isGenerating) chatScrollToBottom(true);
+    });
+    resizeObserver.observe(pendingMsgElem);
+  },
+  methods: {
+    hideSidebar() {
+      document.getElementById('toggle-drawer').checked = false;
+    },
+    setSelectedTheme(theme) {
+      this.selectedTheme = theme;
+      StorageUtils.setTheme(theme);
+    },
+    newConversation() {
+      if (this.isGenerating) return;
+      this.viewingConvId = StorageUtils.getNewConvId();
+      this.editingMsg = null;
+      this.fetchMessages();
+      chatScrollToBottom();
+      this.hideSidebar();
+    },
+    setViewingConv(convId) {
+      if (this.isGenerating) return;
+      this.viewingConvId = convId;
+      this.editingMsg = null;
+      this.fetchMessages();
+      chatScrollToBottom();
+      this.hideSidebar();
+    },
+    deleteConv(convId) {
+      if (this.isGenerating) return;
+      if (window.confirm('Are you sure to delete this conversation?')) {
+        StorageUtils.remove(convId);
+        if (this.viewingConvId === convId) {
+          this.viewingConvId = StorageUtils.getNewConvId();
+          this.editingMsg = null;
+        }
+        this.fetchConversation();
+        this.fetchMessages();
+      }
+    },
+    downloadConv(convId) {
+      const conversation = StorageUtils.getOneConversation(convId);
+      if (!conversation) {
+        alert('Conversation not found.');
+        return;
+      }
+      const conversationJson = JSON.stringify(conversation, null, 2);
+      const blob = new Blob([conversationJson], { type: 'application/json' });
+      const url = URL.createObjectURL(blob);
+      const a = document.createElement('a');
+      a.href = url;
+      a.download = `conversation_${convId}.json`;
+      document.body.appendChild(a);
+      a.click();
+      document.body.removeChild(a);
+      URL.revokeObjectURL(url);
+    },
+    async sendMessage() {
+      if (!this.inputMsg) return;
+      const currConvId = this.viewingConvId;
+
+      StorageUtils.appendMsg(currConvId, {
+        id: Date.now(),
+        role: 'user',
+        content: this.inputMsg,
+      });
+      this.fetchConversation();
+      this.fetchMessages();
+      this.inputMsg = '';
+      this.editingMsg = null;
+      this.generateMessage(currConvId);
+      chatScrollToBottom();
+    },
+    async generateMessage(currConvId) {
+      if (this.isGenerating) return;
+      this.pendingMsg = { id: Date.now()+1, role: 'assistant', content: null };
+      this.isGenerating = true;
+      this.editingMsg = null;
+
+      try {
+        const abortController = new AbortController();
+        this.stopGeneration = () => abortController.abort();
+        const params = {
+          messages: [
+            { role: 'system', content: this.config.systemMessage },
+            ...this.messages,
+          ],
+          stream: true,
+          cache_prompt: true,
+          samplers: this.config.samplers,
+          temperature: this.config.temperature,
+          dynatemp_range: this.config.dynatemp_range,
+          dynatemp_exponent: this.config.dynatemp_exponent,
+          top_k: this.config.top_k,
+          top_p: this.config.top_p,
+          min_p: this.config.min_p,
+          typical_p: this.config.typical_p,
+          xtc_probability: this.config.xtc_probability,
+          xtc_threshold: this.config.xtc_threshold,
+          repeat_last_n: this.config.repeat_last_n,
+          repeat_penalty: this.config.repeat_penalty,
+          presence_penalty: this.config.presence_penalty,
+          frequency_penalty: this.config.frequency_penalty,
+          dry_multiplier: this.config.dry_multiplier,
+          dry_base: this.config.dry_base,
+          dry_allowed_length: this.config.dry_allowed_length,
+          dry_penalty_last_n: this.config.dry_penalty_last_n,
+          max_tokens: this.config.max_tokens,
+          ...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
+          ...(this.config.apiKey ? { api_key: this.config.apiKey } : {}),
+        };
+        const config = {
+          controller: abortController,
+          api_url: BASE_URL,
+          endpoint: '/chat/completions',
+        };
+        for await (const chunk of llama(prompt, params, config)) {
+          const stop = chunk.data.stop;
+          const addedContent = chunk.data.choices[0].delta.content;
+          const lastContent = this.pendingMsg.content || '';
+          if (addedContent) {
+            this.pendingMsg = {
+              id: this.pendingMsg.id,
+              role: 'assistant',
+              content: lastContent + addedContent,
+            };
+          }
+        }
+
+        StorageUtils.appendMsg(currConvId, this.pendingMsg);
+        this.fetchConversation();
+        this.fetchMessages();
+        setTimeout(() => document.getElementById('msg-input').focus(), 1);
+      } catch (error) {
+        if (error.name === 'AbortError') {
+          // user stopped the generation via stopGeneration() function
+          StorageUtils.appendMsg(currConvId, this.pendingMsg);
+          this.fetchConversation();
+          this.fetchMessages();
+        } else {
+          console.error(error);
+          alert(error);
+          // pop last user message
+          const lastUserMsg = StorageUtils.popMsg(currConvId);
+          this.inputMsg = lastUserMsg ? lastUserMsg.content : '';
+        }
+      }
+
+      this.pendingMsg = null;
+      this.isGenerating = false;
+      this.stopGeneration = () => {};
+      this.fetchMessages();
+      chatScrollToBottom();
+    },
+
+    // message actions
+    regenerateMsg(msg) {
+      if (this.isGenerating) return;
+      // TODO: somehow keep old history (like how ChatGPT has different "tree"). This can be done by adding "sub-conversations" with "subconv-" prefix, and new message will have a list of subconvIds
+      const currConvId = this.viewingConvId;
+      StorageUtils.filterAndKeepMsgs(currConvId, (m) => m.id < msg.id);
+      this.fetchConversation();
+      this.fetchMessages();
+      this.generateMessage(currConvId);
+    },
+    copyMsg(msg) {
+      copyStr(msg.content);
+    },
+    editUserMsgAndRegenerate(msg) {
+      if (this.isGenerating) return;
+      const currConvId = this.viewingConvId;
+      const newContent = msg.content;
+      this.editingMsg = null;
+      StorageUtils.filterAndKeepMsgs(currConvId, (m) => m.id < msg.id);
+      StorageUtils.appendMsg(currConvId, {
+        id: Date.now(),
+        role: 'user',
+        content: newContent,
+      });
+      this.fetchConversation();
+      this.fetchMessages();
+      this.generateMessage(currConvId);
+    },
+
+    // settings dialog methods
+    closeAndSaveConfigDialog() {
+      try {
+        if (this.config.custom.length) JSON.parse(this.config.custom);
+      } catch (error) {
+        alert('Invalid JSON for custom config. Please either fix it or leave it empty.');
+        return;
+      }
+      for (const key of CONFIG_NUMERIC_KEYS) {
+        if (isNaN(this.config[key]) || this.config[key].toString().trim().length === 0) {
+          alert(`Invalid number for ${key} (expected an integer or a float)`);
+          return;
+        }
+        this.config[key] = parseFloat(this.config[key]);
+      }
+      this.showConfigDialog = false;
+      StorageUtils.setConfig(this.config);
+    },
+    closeAndDiscardConfigDialog() {
+      this.showConfigDialog = false;
+      this.config = StorageUtils.getConfig();
+    },
+    resetConfigDialog() {
+      if (window.confirm('Are you sure to reset all settings?')) {
+        this.config = {...CONFIG_DEFAULT};
+      }
+    },
+
+    // sync state functions
+    fetchConversation() {
+      this.conversations = StorageUtils.getAllConversations();
+    },
+    fetchMessages() {
+      this.messages = StorageUtils.getOneConversation(this.viewingConvId)?.messages ?? [];
+    },
+  },
+});
+mainApp.config.errorHandler = alert;
+try {
+  mainApp.mount('#app');
+} catch (err) {
+  console.error(err);
+  document.getElementById('app').innerHTML = `<div style="margin:2em auto">
+    Failed to start app. Please try clearing localStorage and try again.<br/>
+    <br/>
+    <button class="btn" onClick="localStorage.clear(); window.location.reload();">Clear localStorage</button>
+  </div>`;
+}
--- a/examples/server/webui/src/styles.css
+++ b/examples/server/webui/src/styles.css
@ -0,0 +1,26 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+.markdown {
+  h1, h2, h3, h4, h5, h6, ul, ol, li { all: revert; }
+  pre {
+    @apply whitespace-pre-wrap rounded-lg p-2;
+    border: 1px solid currentColor;
+  }
+  /* TODO: fix markdown table */
+}
+
+.show-on-hover {
+  @apply md:opacity-0 md:group-hover:opacity-100;
+}
+.btn-mini {
+  @apply cursor-pointer hover:shadow-md;
+}
+.chat-screen { max-width: 900px; }
+
+.chat-bubble-base-300 {
+  --tw-bg-opacity: 1;
+  --tw-text-opacity: 1;
+  @apply bg-base-300 text-base-content;
+}
--- a/examples/server/webui/tailwind.config.js
+++ b/examples/server/webui/tailwind.config.js
@ -0,0 +1,16 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [
+    require('daisyui'),
+  ],
+  daisyui: {
+    themes: ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'],
+  }
+}
--- a/examples/server/webui/vite.config.js
+++ b/examples/server/webui/vite.config.js
@ -0,0 +1,36 @@
+
+import { viteSingleFile } from 'vite-plugin-singlefile';
+import path from 'path';
+import fs from 'fs';
+
+const GUIDE_FOR_FRONTEND = `
+<!--
+  This is a single file build of the frontend.
+  It is automatically generated by the build process.
+  Do not edit this file directly.
+  To make changes, refer to the "Web UI" section in the README.
+-->
+`.trim();
+
+export default {
+  plugins: [
+    viteSingleFile(),
+    (function llamaCppPlugin() {
+      let config;
+      return {
+        name: 'llamacpp:build',
+        apply: 'build',
+        async configResolved(_config) {
+          config = _config;
+        },
+        writeBundle() {
+          const outputIndexHtml = path.join(config.build.outDir, 'index.html');
+          const content = fs.readFileSync(outputIndexHtml, 'utf-8');
+
+          const targetOutputFile = path.join(config.build.outDir, '../../public/index.html');
+          fs.writeFileSync(targetOutputFile, GUIDE_FOR_FRONTEND + '\n' + content);
+        }
+      }
+    })(),
+  ],
+};