Preliminary work for UI and logging

2024-11-20 22:51:26 +05:00 · 2024-11-20 22:51:26 +05:00 · a0e27c1cd0
commit a0e27c1cd0
parent ec6212ee64
3 changed files with 86 additions and 8 deletions
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@ -119,6 +119,36 @@
              </li>
            </ul>
          </div>
+
+          <!-- Templates -->
+          <div class="dropdown dropdown-end dropdown-bottom">
+            <div tabindex="0" role="button" class="btn m-1">
+              Templates
+              <svg width="12px" height="12px" class="inline-block h-2 w-2 fill-current opacity-60" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2048 2048">
+                <path d="M1799 349l242 241-1017 1017L7 590l242-241 775 775 775-775z"></path>
+              </svg>
+            </div>
+            <ul tabindex="0" class="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto">
+              <li>
+                <button
+                  class="btn btn-sm btn-block w-full btn-ghost justify-start"
+                  :class="{ 'btn-active': config.chat_template === 'chatml' }"
+                  @click="config.chat_template = 'chatml'">
+                  auto
+                </button>
+              </li>
+              <li v-for="tmpl in templates">
+                <input
+                  type="radio"
+                  name="tmpl-dropdown"
+                  class="theme-controller btn btn-sm btn-block w-full btn-ghost justify-start"
+                  :aria-label="tmpl"
+                  :value="tmpl"
+                  :checked="config.chat_template === tmpl"
+                  @click="setSelectedTemplate(tmpl)" />
+              </li>
+            </ul>
+          </div>
        </div>
      </div>

@ -214,6 +244,10 @@
            <div class="collapse-content">
              <!-- Samplers queue -->
              <settings-modal-short-input label="Samplers queue" :config-key="'samplers'" :config-default="configDefault" :config-info="configInfo" v-model="config.samplers"></settings-modal-short-input>
+              <!-- Prefix -->
+              <settings-modal-short-input label="Prefix" :config-key="'input_prefix'" :config-default="configDefault" :config-info="configInfo" v-model="config.input_prefix"></settings-modal-short-input>
+              <!-- Suffix -->
+              <settings-modal-short-input label="Suffix" :config-key="'input_suffix'" :config-default="configDefault" :config-info="configInfo" v-model="config.input_suffix"></settings-modal-short-input>
              <!-- Samplers -->
              <template v-for="configKey in ['dynatemp_range', 'dynatemp_exponent', 'typical_p', 'xtc_probability', 'xtc_threshold']">
                <settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]" />
@ -285,6 +319,9 @@
      // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
      apiKey: '',
      systemMessage: 'You are a helpful assistant.',
+      chat_template: 'chatml',
+      input_prefix: '',
+      input_suffix: '',
      // make sure these default values are in sync with `common.h`
      samplers: 'dkypmxt',
      temperature: 0.8,
@ -310,6 +347,9 @@
    const CONFIG_INFO = {
      apiKey: 'Set the API Key if you are using --api-key option for the server.',
      systemMessage: 'The starting message that defines how model should behave.',
+      chat_template: 'The fromat used for messages.',
+      input_prefix: 'Prefix for user messages in custom chat templates.',
+      input_suffix: 'Suffix for user messages in custom chat templates.',
      samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
      temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
      dynatemp_range: 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
@ -335,6 +375,7 @@
    const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT).filter(e => isNumeric(e[1])).map(e => e[0]);
    // list of themes supported by daisyui
    const THEMES = ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'];
+    const CHAT_TEMPLATES = ['chatml', 'llama2', 'mistral', 'phi3', 'zephyr', 'monarch', 'gemma', 'gemma2', 'orion', 'openchat', 'vicuna', 'vicuna-orca', 'deepseek', 'command-r', 'llama3', 'chatglm3', 'chatglm4', 'minicpm', 'deepseek2', 'exaone3', 'rwkv-world', 'granite', 'custom'];

    // markdown support
    const VueMarkdown = defineComponent(
@ -481,6 +522,7 @@
          editingMsg: null,
          // const
          themes: THEMES,
+          templates: CHAT_TEMPLATES,
          configDefault: {...CONFIG_DEFAULT},
          configInfo: {...CONFIG_INFO},
        }
@ -500,6 +542,9 @@
          this.selectedTheme = theme;
          StorageUtils.setTheme(theme);
        },
+        setSelectedTemplate(template) {
+          this.config.chat_template = template;
+        },
        newConversation() {
          if (this.isGenerating) return;
          this.viewingConvId = StorageUtils.getNewConvId();
@ -559,6 +604,9 @@
              stream: true,
              cache_prompt: true,
              samplers: this.config.samplers,
+              chat_template: this.config.chat_template,
+              input_prefix: this.config.input_prefix,
+              input_suffix: this.config.input_suffix,
              temperature: this.config.temperature,
              dynatemp_range: this.config.dynatemp_range,
              dynatemp_exponent: this.config.dynatemp_exponent,
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1147,6 +1147,9 @@ struct server_context {
            {"model",                     params.model_alias},
            {"seed",                      slot.sparams.seed},
            {"seed_cur",                  slot.smpl ? common_sampler_get_seed(slot.smpl) : 0},
+            {"chat_template",             params.chat_template},
+            {"input_prefix",              params.input_prefix},
+            {"input_suffix",              params.input_suffix},
            {"temperature",               slot.sparams.temp},
            {"dynatemp_range",            slot.sparams.dynatemp_range},
            {"dynatemp_exponent",         slot.sparams.dynatemp_exponent},
@ -3218,7 +3221,9 @@ int main(int argc, char ** argv) {

    LOG_INF("%s: model loaded\n", __func__);

-    // if a custom chat template is not supplied, we will use the one that comes with the model (if any)
+    // if a standard chat template is not chosen, check prefix and suffix to switch to custom template
+    // otherwise use the one that comes with the model (if any)
+    // if a standard chat template is chosen, warn about prefix and suffix not being used
    if (params.chat_template.empty()) {
        if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
            LOG_WRN("%s: Prefix and suffix are used instead of a chat template. This may cause the model to output suboptimal responses\n", __func__);
@ -3227,13 +3232,16 @@ int main(int argc, char ** argv) {
            LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
            params.chat_template = "chatml";
        }
-    } else if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
-        LOG_WRN("%s: Prefix and suffix are not used because a chat template is defined.\n", __func__);
+    } else if (params.chat_template != "custom" && 
+              (!params.input_prefix.empty() || !params.input_suffix.empty())) {
+        LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a standard chat template is chosen.\n", __func__);
    } else {
-        // print sample chat example to make it clear which template is used
-        LOG_INF("%s: chat template, built_in: %d, chat_example: '%s'\n", __func__, params.chat_template.empty(), common_chat_format_example(ctx_server.model, params.chat_template).c_str());
+        LOG_WRN("%s: Custom chat template is chosen. This may cause the model to output suboptimal responses\n", __func__);
    }

+    // print sample chat example to make it clear which template is used
+    LOG_INF("%s: chat template: '%s', built_in: %d, chat_example: '%s'\n", __func__, params.chat_template.c_str(), params.chat_template.empty(), format_chat_example(ctx_server.model, params.chat_template, params.input_prefix, params.input_suffix).c_str());
+
    ctx_server.queue_tasks.on_new_task(std::bind(
                &server_context::process_single_task, &ctx_server, std::placeholders::_1));

--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -299,7 +299,7 @@ static llama_tokens format_infill(
    return embd_inp;
 }

-// Format given chat. If tmpl is empty, we take the template from model metadata
+// Format given chat. If tmpl is empty, we either use prefix and suffix (if defined), or take the template from model metadata
 inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::string & prefix, const std::string & suffix, const std::vector<json> & messages) {
    std::vector<common_chat_msg> chat;
    std::string formatted_chat;
@ -331,16 +331,38 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
            if (role == "user") formatted_chat += prefix + content + suffix;
            else formatted_chat += content;
        } else {
-            chat.push_back({role, content}); 
+            chat.push_back({role, content});
        }
    }

    if (tmpl != "custom") formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
-    LOG_DBG("formatted_chat: '%s'\n", formatted_chat.c_str());
+    LOG_WRN("formatted_chat using '%s': '%s'\n", tmpl.c_str(), formatted_chat.c_str());

    return formatted_chat;
 }

+inline std::string format_chat_example(const struct llama_model * model, const std::string & tmpl, const std::string & prefix, const std::string & suffix) {
+    std::vector<common_chat_msg> msgs = {
+        {"system",    "You are a helpful assistant"},
+        {"user",      "Hello"},
+        {"assistant", "Hi there"},
+        {"user",      "How are you?"},
+    };
+
+    std::string formatted_example;
+
+    if (tmpl == "custom") {
+        for (auto message : msgs) {
+            if (message.role == "user") formatted_example += prefix + message.content + suffix;
+            else formatted_example += message.content;
+        }
+    } else {
+        formatted_example = common_chat_apply_template(model, tmpl, msgs, true);
+    }
+
+    return formatted_example;
+}
+
 static std::string llama_get_chat_template(const struct llama_model * model) {
    std::string template_key = "tokenizer.chat_template";
    // call with NULL buffer to get the total size of the string