diff --git a/common/chat-template.hpp b/common/chat-template.hpp index 882ba41bd..0e88fb361 100644 --- a/common/chat-template.hpp +++ b/common/chat-template.hpp @@ -249,30 +249,16 @@ class chat_template { inputs.add_generation_prompt = false; full = apply(inputs); } - auto eos_pos_last = full.rfind(eos_token_); - if (eos_pos_last == prefix.size() - eos_token_.size() || - (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) { - full = full.substr(0, eos_pos_last); - } - size_t common_prefix_length = 0; - for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) { - if (prefix[i] != full[i]) { - break; + + if (full.find(prefix) != 0) { + if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) { + prefix = prefix.substr(0, prefix.size() - eos_token_.size()); } - if (prefix[i] == '<') { - // DeepSeek R1's template (as of 20250209) adds a trailing if add_generation_prompt, - // but it removes thinking tags for past messages. - // The prefix and full strings diverge at vs. <|tool▁calls▁begin|>, we avoid consuming the leading <. - continue; - } - common_prefix_length = i + 1; } - auto example = full.substr(common_prefix_length); - if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) { + if (full.find(prefix) != 0) { fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n"); - } else { - tool_call_example_ = example; } + tool_call_example_ = full.substr(prefix.size()); } } catch (const std::exception & e) { fprintf(stderr, "Failed to generate tool call example: %s\n", e.what()); @@ -377,7 +363,7 @@ class chat_template { if (polyfill_tools) { adjusted_messages = add_system(inputs.messages, "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) + - (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n")); + (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_)); } else { adjusted_messages = inputs.messages; } diff --git a/common/log.h b/common/log.h index 4ebc6314b..85dd4393b 100644 --- a/common/log.h +++ b/common/log.h @@ -2,7 +2,6 @@ #include "ggml.h" // for ggml_log_level -#define LOG_CLR_TO_EOL "\033[K\r" #define LOG_COL_DEFAULT "\033[0m" #define LOG_COL_BOLD "\033[1m" #define LOG_COL_RED "\033[31m" diff --git a/common/minja.hpp b/common/minja.hpp index c58dd66e0..c304b5c66 100644 --- a/common/minja.hpp +++ b/common/minja.hpp @@ -1385,13 +1385,6 @@ static std::string strip(const std::string & s) { return s.substr(start, end - start + 1); } -static std::string capitalize(const std::string & s) { - if (s.empty()) return s; - auto result = s; - result[0] = std::toupper(result[0]); - return result; -} - static std::string html_escape(const std::string & s) { std::string result; result.reserve(s.size()); @@ -1469,9 +1462,6 @@ public: if (method->get_name() == "strip") { vargs.expectArgs("strip method", {0, 0}, {0, 0}); return Value(strip(str)); - } else if (method->get_name() == "capitalize") { - vargs.expectArgs("capitalize method", {0, 0}, {0, 0}); - return Value(capitalize(str)); } else if (method->get_name() == "endswith") { vargs.expectArgs("endswith method", {1, 1}, {0, 0}); auto suffix = vargs.args[0].get(); @@ -1802,7 +1792,7 @@ private: auto left = parseStringConcat(); if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression"); - static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)"); + static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)"); static std::regex not_tok(R"(not\b)"); std::string op_str; while (!(op_str = consumeToken(compare_tok)).empty()) { @@ -2181,7 +2171,7 @@ private: using TemplateTokenIterator = TemplateTokenVector::const_iterator; std::vector parseVarNames() { - static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)"); + static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)"); std::vector group; if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names"); @@ -2204,13 +2194,13 @@ private: } TemplateTokenVector tokenize() { - static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})"); + static std::regex comment_tok(R"(\{#([-~]?)([\s\S\r\n]*?)([-~]?)#\})"); static std::regex expr_open_regex(R"(\{\{([-~])?)"); - static std::regex block_open_regex(R"(^\{%([-~])?\s*)"); + static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)"); static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)"); static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)"); - static std::regex expr_close_regex(R"(\s*([-~])?\}\})"); - static std::regex block_close_regex(R"(\s*([-~])?%\})"); + static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})"); + static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})"); TemplateTokenVector tokens; std::vector group; @@ -2294,7 +2284,7 @@ private: auto post_space = parseBlockClose(); tokens.push_back(std::make_unique(location, pre_space, post_space)); } else if (keyword == "set") { - static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))"); + static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))"); std::string ns; std::vector var_names; @@ -2346,11 +2336,6 @@ private: throw std::runtime_error("Unexpected block: " + keyword); } } else if (std::regex_search(it, end, match, non_text_open_regex)) { - if (!match.position()) { - if (match[0] != "{#") - throw std::runtime_error("Internal error: Expected a comment"); - throw std::runtime_error("Missing end of comment tag"); - } auto text_end = it + match.position(); text = std::string(it, text_end); it = text_end; @@ -2415,7 +2400,7 @@ private: auto text = text_token->text; if (post_space == SpaceHandling::Strip) { - static std::regex trailing_space_regex(R"(\s+$)"); + static std::regex trailing_space_regex(R"((\s|\r|\n)+$)"); text = std::regex_replace(text, trailing_space_regex, ""); } else if (options.lstrip_blocks && it != end) { auto i = text.size(); @@ -2425,7 +2410,7 @@ private: } } if (pre_space == SpaceHandling::Strip) { - static std::regex leading_space_regex(R"(^\s+)"); + static std::regex leading_space_regex(R"(^(\s|\r|\n)+)"); text = std::regex_replace(text, leading_space_regex, ""); } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast((*(it - 2)).get())) { if (text.length() > 0 && text[0] == '\n') { diff --git a/examples/main/README.md b/examples/main/README.md index ceaed42f6..46f92eb7a 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -37,7 +37,7 @@ Once downloaded, place your model in the models folder in llama.cpp. ##### Infinite text from a starting prompt (you can use `Ctrl-C` to stop it): ```bash -./llama-cli -m models/gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 +./llama-cli -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 ``` ### Windows: diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 9362da220..eab60cad1 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -535,7 +535,8 @@ class HttpClient { static void print_progress(const std::string & progress_prefix, const std::string & progress_bar, const std::string & progress_suffix) { - printe("\r" LOG_CLR_TO_EOL "%s%s| %s", progress_prefix.c_str(), progress_bar.c_str(), progress_suffix.c_str()); + printe("\r%*s\r%s%s| %s", get_terminal_width(), " ", progress_prefix.c_str(), progress_bar.c_str(), + progress_suffix.c_str()); } // Function to write data to a file static size_t write_data(void * ptr, size_t size, size_t nmemb, void * stream) { @@ -796,13 +797,16 @@ class LlamaData { llama_model_ptr initialize_model(Opt & opt) { ggml_backend_load_all(); resolve_model(opt.model_); - printe("\r" LOG_CLR_TO_EOL "Loading model"); + printe( + "\r%*s" + "\rLoading model", + get_terminal_width(), " "); llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params)); if (!model) { printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str()); } - printe("\r" LOG_CLR_TO_EOL); + printe("\r%*s\r", static_cast(sizeof("Loading model")), " "); return model; } @@ -965,7 +969,10 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str static int read_user_input(std::string & user_input) { static const char * prompt_prefix = "> "; #ifdef WIN32 - printf("\r" LOG_CLR_TO_EOL LOG_COL_DEFAULT "%s", prompt_prefix); + printf( + "\r%*s" + "\r" LOG_COL_DEFAULT "%s", + get_terminal_width(), " ", prompt_prefix); std::getline(std::cin, user_input); if (std::cin.eof()) { diff --git a/examples/server/README.md b/examples/server/README.md index d0b262f0e..e9d0374ad 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -220,7 +220,7 @@ services: The project includes a web-based user interface that enables interaction with the model through the `/chat/completions` endpoint. The web UI is developed using: -- `react` framework for frontend development +- `vue` framework for frontend development - `tailwindcss` and `daisyui` for styling - `vite` for build tooling diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 141e80920..646988ad8 100644 Binary files a/examples/server/public/index.html.gz and b/examples/server/public/index.html.gz differ diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 0718806c8..e38f89aa5 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -334,24 +334,24 @@ struct server_task { if (data.contains("json_schema") && !data.contains("grammar")) { try { auto schema = json_value(data, "json_schema", json::object()); - SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str()); + LOG_DBG("JSON schema: %s\n", schema.dump(2).c_str()); params.sampling.grammar = json_schema_to_grammar(schema); - SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); + LOG_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); } catch (const std::exception & e) { throw std::runtime_error(std::string("\"json_schema\": ") + e.what()); } } else { params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); - SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); + LOG_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy); - SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); + LOG_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); } { auto it = data.find("chat_format"); if (it != data.end()) { params.oaicompat_chat_format = static_cast(it->get()); - SRV_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); + LOG_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); } else { params.oaicompat_chat_format = defaults.oaicompat_chat_format; } @@ -367,12 +367,12 @@ struct server_task { auto ids = common_tokenize(vocab, trigger.word, /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { - SRV_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str()); + LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str()); params.sampling.grammar_trigger_tokens.push_back(ids[0]); params.sampling.preserved_tokens.insert(ids[0]); continue; } - SRV_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str()); + LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str()); params.sampling.grammar_trigger_words.push_back(trigger); } } @@ -381,11 +381,11 @@ struct server_task { for (const auto & t : *preserved_tokens) { auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); if (ids.size() == 1) { - SRV_DBG("Preserved token: %d\n", ids[0]); + LOG_DBG("Preserved token: %d\n", ids[0]); params.sampling.preserved_tokens.insert(ids[0]); } else { // This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens. - SRV_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get().c_str()); + LOG_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get().c_str()); } } } @@ -717,7 +717,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string finish_reason = "length"; common_chat_msg msg; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { - SRV_DBG("Parsing chat message: %s\n", content.c_str()); + LOG_DBG("Parsing chat message: %s\n", content.c_str()); msg = common_chat_parse(content, oaicompat_chat_format); finish_reason = msg.tool_calls.empty() ? "stop" : "tool_calls"; } else { @@ -1600,6 +1600,10 @@ struct server_queue { while (true) { std::unique_lock lock(mutex_tasks); + if (!running) { + QUE_DBG("%s", "terminate\n"); + return; + } if (queue_tasks.empty()) { lock.unlock(); break; @@ -1620,11 +1624,11 @@ struct server_queue { QUE_DBG("%s", "waiting for new tasks\n"); { std::unique_lock lock(mutex_tasks); + if (!running) { + QUE_DBG("%s", "terminate\n"); + return; + } if (queue_tasks.empty()) { - if (!running) { - QUE_DBG("%s", "terminate\n"); - return; - } condition_tasks.wait(lock, [&]{ return (!queue_tasks.empty() || !running); }); @@ -1885,7 +1889,7 @@ struct server_context { } if (params_base.chat_template.empty() && !validate_builtin_chat_template(params.use_jinja)) { - SRV_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); + LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); chat_templates = common_chat_templates_from_model(model, "chatml"); } else { chat_templates = common_chat_templates_from_model(model, params_base.chat_template); @@ -3355,10 +3359,10 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp // reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch - SRV_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status); + LOG_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status); - SRV_DBG("request: %s\n", req.body.c_str()); - SRV_DBG("response: %s\n", res.body.c_str()); + LOG_DBG("request: %s\n", req.body.c_str()); + LOG_DBG("response: %s\n", res.body.c_str()); } std::function shutdown_handler; @@ -3860,9 +3864,7 @@ int main(int argc, char ** argv) { try { const auto & prompt = data.at("prompt"); - // TODO: this log can become very long, put it behind a flag or think about a more compact format - //SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str()); - + LOG_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str()); std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true); tasks.reserve(tokenized_prompts.size()); for (size_t i = 0; i < tokenized_prompts.size(); i++) { @@ -4378,9 +4380,6 @@ int main(int argc, char ** argv) { res.set_content("Error: gzip is not supported by this browser", "text/plain"); } else { res.set_header("Content-Encoding", "gzip"); - // COEP and COOP headers, required by pyodide (python interpreter) - res.set_header("Cross-Origin-Embedder-Policy", "require-corp"); - res.set_header("Cross-Origin-Opener-Policy", "same-origin"); res.set_content(reinterpret_cast(index_html_gz), index_html_gz_len, "text/html; charset=utf-8"); } return false; @@ -4430,6 +4429,7 @@ int main(int argc, char ** argv) { // clean up function, to be called before exit auto clean_up = [&svr]() { + SRV_INF("%s: cleaning up before exit...\n", __func__); svr->stop(); llama_backend_free(); }; @@ -4446,10 +4446,6 @@ int main(int argc, char ** argv) { } if (!was_bound) { - //LOG_ERROR("couldn't bind HTTP server socket", { - // {"hostname", params.hostname}, - // {"port", params.port}, - //}); LOG_ERR("%s: couldn't bind HTTP server socket, hostname: %s, port: %d\n", __func__, params.hostname.c_str(), params.port); clean_up(); return 1; @@ -4466,7 +4462,7 @@ int main(int argc, char ** argv) { if (!ctx_server.load_model(params)) { clean_up(); - t.join(); + // t.join(); // FIXME: see below LOG_ERR("%s: exiting due to model loading error\n", __func__); return 1; } @@ -4490,13 +4486,10 @@ int main(int argc, char ** argv) { }); shutdown_handler = [&](int) { + // this will unblock start_loop() ctx_server.queue_tasks.terminate(); }; - LOG_INF("%s: server is listening on http://%s:%d - starting the main loop\n", __func__, params.hostname.c_str(), params.port); - - ctx_server.queue_tasks.start_loop(); - #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct sigaction sigint_action; sigint_action.sa_handler = signal_handler; @@ -4511,8 +4504,13 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif + LOG_INF("%s: server is listening on http://%s:%d - starting the main loop\n", __func__, params.hostname.c_str(), params.port); + + // this call blocks the main thread until queue_tasks.terminate() is called + ctx_server.queue_tasks.start_loop(); + clean_up(); - t.join(); + // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this return 0; } diff --git a/examples/server/webui/package-lock.json b/examples/server/webui/package-lock.json index c6c5de3c0..e69fd2aa5 100644 --- a/examples/server/webui/package-lock.json +++ b/examples/server/webui/package-lock.json @@ -8,7 +8,6 @@ "name": "webui", "version": "0.0.0", "dependencies": { - "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", @@ -903,15 +902,6 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@heroicons/react": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/@heroicons/react/-/react-2.2.0.tgz", - "integrity": "sha512-LMcepvRaS9LYHJGsF0zzmgKCUim/X3N/DQKc4jepAXJ7l8QxJ1PmxJzqplF2Z3FE4PqBAIGyJAQ/w4B5dsqbtQ==", - "license": "MIT", - "peerDependencies": { - "react": ">= 16 || ^19.0.0-rc" - } - }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", diff --git a/examples/server/webui/package.json b/examples/server/webui/package.json index 3be2b14de..f3c7dde43 100644 --- a/examples/server/webui/package.json +++ b/examples/server/webui/package.json @@ -11,7 +11,6 @@ "preview": "vite preview" }, "dependencies": { - "@heroicons/react": "^2.2.0", "@sec-ant/readable-stream": "^0.6.0", "@vscode/markdown-it-katex": "^1.1.1", "autoprefixer": "^10.4.20", diff --git a/examples/server/webui/src/App.tsx b/examples/server/webui/src/App.tsx index 2ce734682..d151ba291 100644 --- a/examples/server/webui/src/App.tsx +++ b/examples/server/webui/src/App.tsx @@ -1,9 +1,8 @@ import { HashRouter, Outlet, Route, Routes } from 'react-router'; import Header from './components/Header'; import Sidebar from './components/Sidebar'; -import { AppContextProvider, useAppContext } from './utils/app.context'; +import { AppContextProvider } from './utils/app.context'; import ChatScreen from './components/ChatScreen'; -import SettingDialog from './components/SettingDialog'; function App() { return ( @@ -23,23 +22,13 @@ function App() { } function AppLayout() { - const { showSettings, setShowSettings } = useAppContext(); return ( <> -
+
- { - setShowSettings(false)} - /> - } ); } diff --git a/examples/server/webui/src/Config.ts b/examples/server/webui/src/Config.ts index 779ed9bf7..1860ffcc9 100644 --- a/examples/server/webui/src/Config.ts +++ b/examples/server/webui/src/Config.ts @@ -10,7 +10,6 @@ export const BASE_URL = new URL('.', document.baseURI).href export const CONFIG_DEFAULT = { // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value. - // Do not use nested objects, keep it single level. Prefix the key if you need to group them. apiKey: '', systemMessage: 'You are a helpful assistant.', showTokensPerSecond: false, @@ -37,8 +36,6 @@ export const CONFIG_DEFAULT = { dry_penalty_last_n: -1, max_tokens: -1, custom: '', // custom json-stringified object - // experimental features - pyIntepreterEnabled: false, }; export const CONFIG_INFO: Record = { apiKey: 'Set the API Key if you are using --api-key option for the server.', diff --git a/examples/server/webui/src/components/CanvasPyInterpreter.tsx b/examples/server/webui/src/components/CanvasPyInterpreter.tsx deleted file mode 100644 index c2707fe20..000000000 --- a/examples/server/webui/src/components/CanvasPyInterpreter.tsx +++ /dev/null @@ -1,195 +0,0 @@ -import { useEffect, useState } from 'react'; -import { useAppContext } from '../utils/app.context'; -import { OpenInNewTab, XCloseButton } from '../utils/common'; -import { CanvasType } from '../utils/types'; -import { PlayIcon, StopIcon } from '@heroicons/react/24/outline'; -import StorageUtils from '../utils/storage'; - -const canInterrupt = typeof SharedArrayBuffer === 'function'; - -// adapted from https://pyodide.org/en/stable/usage/webworker.html -const WORKER_CODE = ` -importScripts("https://cdn.jsdelivr.net/pyodide/v0.27.2/full/pyodide.js"); - -let stdOutAndErr = []; - -let pyodideReadyPromise = loadPyodide({ - stdout: (data) => stdOutAndErr.push(data), - stderr: (data) => stdOutAndErr.push(data), -}); - -let alreadySetBuff = false; - -self.onmessage = async (event) => { - stdOutAndErr = []; - - // make sure loading is done - const pyodide = await pyodideReadyPromise; - const { id, python, context, interruptBuffer } = event.data; - - if (interruptBuffer && !alreadySetBuff) { - pyodide.setInterruptBuffer(interruptBuffer); - alreadySetBuff = true; - } - - // Now load any packages we need, run the code, and send the result back. - await pyodide.loadPackagesFromImports(python); - - // make a Python dictionary with the data from content - const dict = pyodide.globals.get("dict"); - const globals = dict(Object.entries(context)); - try { - self.postMessage({ id, running: true }); - // Execute the python code in this context - const result = pyodide.runPython(python, { globals }); - self.postMessage({ result, id, stdOutAndErr }); - } catch (error) { - self.postMessage({ error: error.message, id }); - } - interruptBuffer[0] = 0; -}; -`; - -let worker: Worker; -const interruptBuffer = canInterrupt - ? new Uint8Array(new SharedArrayBuffer(1)) - : null; - -const startWorker = () => { - if (!worker) { - worker = new Worker( - URL.createObjectURL(new Blob([WORKER_CODE], { type: 'text/javascript' })) - ); - } -}; - -if (StorageUtils.getConfig().pyIntepreterEnabled) { - startWorker(); -} - -const runCodeInWorker = ( - pyCode: string, - callbackRunning: () => void -): { - donePromise: Promise; - interrupt: () => void; -} => { - startWorker(); - const id = Math.random() * 1e8; - const context = {}; - if (interruptBuffer) { - interruptBuffer[0] = 0; - } - - const donePromise = new Promise((resolve) => { - worker.onmessage = (event) => { - const { error, stdOutAndErr, running } = event.data; - if (id !== event.data.id) return; - if (running) { - callbackRunning(); - return; - } else if (error) { - resolve(error.toString()); - } else { - resolve(stdOutAndErr.join('\n')); - } - }; - worker.postMessage({ id, python: pyCode, context, interruptBuffer }); - }); - - const interrupt = () => { - console.log('Interrupting...'); - console.trace(); - if (interruptBuffer) { - interruptBuffer[0] = 2; - } - }; - - return { donePromise, interrupt }; -}; - -export default function CanvasPyInterpreter() { - const { canvasData, setCanvasData } = useAppContext(); - - const [code, setCode] = useState(canvasData?.content ?? ''); // copy to avoid direct mutation - const [running, setRunning] = useState(false); - const [output, setOutput] = useState(''); - const [interruptFn, setInterruptFn] = useState<() => void>(); - const [showStopBtn, setShowStopBtn] = useState(false); - - const runCode = async (pycode: string) => { - interruptFn?.(); - setRunning(true); - setOutput('Loading Pyodide...'); - const { donePromise, interrupt } = runCodeInWorker(pycode, () => { - setOutput('Running...'); - setShowStopBtn(canInterrupt); - }); - setInterruptFn(() => interrupt); - const out = await donePromise; - setOutput(out); - setRunning(false); - setShowStopBtn(false); - }; - - // run code on mount - useEffect(() => { - setCode(canvasData?.content ?? ''); - runCode(canvasData?.content ?? ''); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [canvasData?.content]); - - if (canvasData?.type !== CanvasType.PY_INTERPRETER) { - return null; - } - - return ( -
-
-
- Python Interpreter - setCanvasData(null)} - /> -
-
- -
-
- - {showStopBtn && ( - - )} - - - Report a bug - - -
- -
-
-
-
- ); -} diff --git a/examples/server/webui/src/components/ChatMessage.tsx b/examples/server/webui/src/components/ChatMessage.tsx index ec72196ba..01d2fb80c 100644 --- a/examples/server/webui/src/components/ChatMessage.tsx +++ b/examples/server/webui/src/components/ChatMessage.tsx @@ -92,7 +92,7 @@ export default function ChatMessage({ <> @@ -149,17 +149,11 @@ export default function ChatMessage({ )}
- +
)} - +
)} diff --git a/examples/server/webui/src/components/ChatScreen.tsx b/examples/server/webui/src/components/ChatScreen.tsx index dbc683ed1..d679f4ebb 100644 --- a/examples/server/webui/src/components/ChatScreen.tsx +++ b/examples/server/webui/src/components/ChatScreen.tsx @@ -1,11 +1,9 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useRef, useState } from 'react'; import { useAppContext } from '../utils/app.context'; import StorageUtils from '../utils/storage'; import { useNavigate } from 'react-router'; import ChatMessage from './ChatMessage'; -import { CanvasType, PendingMessage } from '../utils/types'; -import { classNames } from '../utils/misc'; -import CanvasPyInterpreter from './CanvasPyInterpreter'; +import { PendingMessage } from '../utils/types'; export default function ChatScreen() { const { @@ -14,24 +12,24 @@ export default function ChatScreen() { isGenerating, stopGenerating, pendingMessages, - canvasData, } = useAppContext(); const [inputMsg, setInputMsg] = useState(''); + const containerRef = useRef(null); const navigate = useNavigate(); const currConvId = viewingConversation?.id ?? ''; const pendingMsg: PendingMessage | undefined = pendingMessages[currConvId]; const scrollToBottom = (requiresNearBottom: boolean) => { - const mainScrollElem = document.getElementById('main-scroll'); - if (!mainScrollElem) return; + if (!containerRef.current) return; + const msgListElem = containerRef.current; const spaceToBottom = - mainScrollElem.scrollHeight - - mainScrollElem.scrollTop - - mainScrollElem.clientHeight; + msgListElem.scrollHeight - + msgListElem.scrollTop - + msgListElem.clientHeight; if (!requiresNearBottom || spaceToBottom < 50) { setTimeout( - () => mainScrollElem.scrollTo({ top: mainScrollElem.scrollHeight }), + () => msgListElem.scrollTo({ top: msgListElem.scrollHeight }), 1 ); } @@ -60,87 +58,66 @@ export default function ChatScreen() { } }; - const hasCanvas = !!canvasData; - return ( -
+ <> + {/* chat messages */}
- {/* chat messages */} -
-
- {/* placeholder to shift the message to the bottom */} - {viewingConversation ? '' : 'Send a message to start'} -
- {viewingConversation?.messages.map((msg) => ( - - ))} - - {pendingMsg && ( - - )} +
+ {/* placeholder to shift the message to the bottom */} + {viewingConversation ? '' : 'Send a message to start'}
+ {viewingConversation?.messages.map((msg) => ( + + ))} - {/* chat input */} -
- - {isGenerating(currConvId) ? ( - - ) : ( - - )} -
-
-
- {canvasData?.type === CanvasType.PY_INTERPRETER && ( - + {pendingMsg && ( + )}
-
+ + {/* chat input */} +
+ + {isGenerating(currConvId) ? ( + + ) : ( + + )} +
+ ); } diff --git a/examples/server/webui/src/components/Header.tsx b/examples/server/webui/src/components/Header.tsx index 505350313..015264abc 100644 --- a/examples/server/webui/src/components/Header.tsx +++ b/examples/server/webui/src/components/Header.tsx @@ -5,11 +5,12 @@ import { classNames } from '../utils/misc'; import daisyuiThemes from 'daisyui/src/theming/themes'; import { THEMES } from '../Config'; import { useNavigate } from 'react-router'; +import SettingDialog from './SettingDialog'; export default function Header() { const navigate = useNavigate(); const [selectedTheme, setSelectedTheme] = useState(StorageUtils.getTheme()); - const { setShowSettings } = useAppContext(); + const [showSettingDialog, setShowSettingDialog] = useState(false); const setTheme = (theme: string) => { StorageUtils.setTheme(theme); @@ -53,7 +54,7 @@ export default function Header() { }; return ( -
+
{/* open sidebar button */}
-
+ + setShowSettingDialog(false)} + />
); } diff --git a/examples/server/webui/src/components/MarkdownDisplay.tsx b/examples/server/webui/src/components/MarkdownDisplay.tsx index 5b7a72591..8ab8de655 100644 --- a/examples/server/webui/src/components/MarkdownDisplay.tsx +++ b/examples/server/webui/src/components/MarkdownDisplay.tsx @@ -9,16 +9,8 @@ import 'katex/dist/katex.min.css'; import { classNames, copyStr } from '../utils/misc'; import { ElementContent, Root } from 'hast'; import { visit } from 'unist-util-visit'; -import { useAppContext } from '../utils/app.context'; -import { CanvasType } from '../utils/types'; -export default function MarkdownDisplay({ - content, - isGenerating, -}: { - content: string; - isGenerating?: boolean; -}) { +export default function MarkdownDisplay({ content }: { content: string }) { const preprocessedContent = useMemo( () => preprocessLaTeX(content), [content] @@ -29,13 +21,8 @@ export default function MarkdownDisplay({ rehypePlugins={[rehypeHightlight, rehypeKatex, rehypeCustomCopyButton]} components={{ button: (props) => ( - + ), - // note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it) }} > {preprocessedContent} @@ -43,12 +30,11 @@ export default function MarkdownDisplay({ ); } -const CodeBlockButtons: React.ElementType< +const CopyCodeButton: React.ElementType< React.ClassAttributes & React.HTMLAttributes & - ExtraProps & { origContent: string; isGenerating?: boolean } -> = ({ node, origContent, isGenerating }) => { - const { config } = useAppContext(); + ExtraProps & { origContent: string } +> = ({ node, origContent }) => { const startOffset = node?.position?.start.offset ?? 0; const endOffset = node?.position?.end.offset ?? 0; @@ -61,33 +47,14 @@ const CodeBlockButtons: React.ElementType< [origContent, startOffset, endOffset] ); - const codeLanguage = useMemo( - () => - origContent - .substring(startOffset, startOffset + 10) - .match(/^```([^\n]+)\n/)?.[1] ?? '', - [origContent, startOffset] - ); - - const canRunCode = - !isGenerating && - config.pyIntepreterEnabled && - codeLanguage.startsWith('py'); - return (
- {canRunCode && ( - - )}
); }; @@ -114,31 +81,6 @@ export const CopyButton = ({ ); }; -export const RunPyCodeButton = ({ - content, - className, -}: { - content: string; - className?: string; -}) => { - const { setCanvasData } = useAppContext(); - return ( - <> - - - ); -}; - /** * This injects the "button" element before each "pre" element. * The actual button will be replaced with a react component in the MarkdownDisplay. @@ -152,7 +94,9 @@ function rehypeCustomCopyButton() { // replace current node preNode.properties.visited = 'true'; node.tagName = 'div'; - node.properties = {}; + node.properties = { + className: 'relative my-4', + }; // add node for button const btnNode: ElementContent = { type: 'element', diff --git a/examples/server/webui/src/components/SettingDialog.tsx b/examples/server/webui/src/components/SettingDialog.tsx index 592b93fa3..ae8117fd2 100644 --- a/examples/server/webui/src/components/SettingDialog.tsx +++ b/examples/server/webui/src/components/SettingDialog.tsx @@ -3,27 +3,17 @@ import { useAppContext } from '../utils/app.context'; import { CONFIG_DEFAULT, CONFIG_INFO } from '../Config'; import { isDev } from '../Config'; import StorageUtils from '../utils/storage'; -import { classNames, isBoolean, isNumeric, isString } from '../utils/misc'; -import { - BeakerIcon, - ChatBubbleOvalLeftEllipsisIcon, - Cog6ToothIcon, - FunnelIcon, - HandRaisedIcon, - SquaresPlusIcon, -} from '@heroicons/react/24/outline'; -import { OpenInNewTab } from '../utils/common'; type SettKey = keyof typeof CONFIG_DEFAULT; -const BASIC_KEYS: SettKey[] = [ +const COMMON_SAMPLER_KEYS: SettKey[] = [ 'temperature', 'top_k', 'top_p', 'min_p', 'max_tokens', ]; -const SAMPLER_KEYS: SettKey[] = [ +const OTHER_SAMPLER_KEYS: SettKey[] = [ 'dynatemp_range', 'dynatemp_exponent', 'typical_p', @@ -41,223 +31,6 @@ const PENALTY_KEYS: SettKey[] = [ 'dry_penalty_last_n', ]; -enum SettingInputType { - SHORT_INPUT, - LONG_INPUT, - CHECKBOX, - CUSTOM, -} - -interface SettingFieldInput { - type: Exclude; - label: string | React.ReactElement; - help?: string | React.ReactElement; - key: SettKey; -} - -interface SettingFieldCustom { - type: SettingInputType.CUSTOM; - key: SettKey; - component: - | string - | React.FC<{ - value: string | boolean | number; - onChange: (value: string) => void; - }>; -} - -interface SettingSection { - title: React.ReactElement; - fields: (SettingFieldInput | SettingFieldCustom)[]; -} - -const ICON_CLASSNAME = 'w-4 h-4 mr-1 inline'; - -const SETTING_SECTIONS: SettingSection[] = [ - { - title: ( - <> - - General - - ), - fields: [ - { - type: SettingInputType.SHORT_INPUT, - label: 'API Key', - key: 'apiKey', - }, - { - type: SettingInputType.LONG_INPUT, - label: 'System Message (will be disabled if left empty)', - key: 'systemMessage', - }, - ...BASIC_KEYS.map( - (key) => - ({ - type: SettingInputType.SHORT_INPUT, - label: key, - key, - }) as SettingFieldInput - ), - ], - }, - { - title: ( - <> - - Samplers - - ), - fields: [ - { - type: SettingInputType.SHORT_INPUT, - label: 'Samplers queue', - key: 'samplers', - }, - ...SAMPLER_KEYS.map( - (key) => - ({ - type: SettingInputType.SHORT_INPUT, - label: key, - key, - }) as SettingFieldInput - ), - ], - }, - { - title: ( - <> - - Penalties - - ), - fields: PENALTY_KEYS.map((key) => ({ - type: SettingInputType.SHORT_INPUT, - label: key, - key, - })), - }, - { - title: ( - <> - - Reasoning - - ), - fields: [ - { - type: SettingInputType.CHECKBOX, - label: 'Expand though process by default for generating message', - key: 'showThoughtInProgress', - }, - { - type: SettingInputType.CHECKBOX, - label: - 'Exclude thought process when sending request to API (Recommended for DeepSeek-R1)', - key: 'excludeThoughtOnReq', - }, - ], - }, - { - title: ( - <> - - Advanced - - ), - fields: [ - { - type: SettingInputType.CUSTOM, - key: 'custom', // dummy key, won't be used - component: () => { - const debugImportDemoConv = async () => { - const res = await fetch('/demo-conversation.json'); - const demoConv = await res.json(); - StorageUtils.remove(demoConv.id); - for (const msg of demoConv.messages) { - StorageUtils.appendMsg(demoConv.id, msg); - } - }; - return ( - - ); - }, - }, - { - type: SettingInputType.CHECKBOX, - label: 'Show tokens per second', - key: 'showTokensPerSecond', - }, - { - type: SettingInputType.LONG_INPUT, - label: ( - <> - Custom JSON config (For more info, refer to{' '} - - server documentation - - ) - - ), - key: 'custom', - }, - ], - }, - { - title: ( - <> - - Experimental - - ), - fields: [ - { - type: SettingInputType.CUSTOM, - key: 'custom', // dummy key, won't be used - component: () => ( - <> -

- Experimental features are not guaranteed to work correctly. -
-
- If you encounter any problems, create a{' '} - - Bug (misc.) - {' '} - report on Github. Please also specify webui/experimental on - the report title and include screenshots. -
-
- Some features may require packages downloaded from CDN, so they - need internet connection. -

- - ), - }, - { - type: SettingInputType.CHECKBOX, - label: ( - <> - Enable Python interpreter -
- - This feature uses{' '} - pyodide, - downloaded from CDN. To use this feature, ask the LLM to generate - python code inside a markdown code block. You will see a "Run" - button on the code block, near the "Copy" button. - - - ), - key: 'pyIntepreterEnabled', - }, - ], - }, -]; - export default function SettingDialog({ show, onClose, @@ -266,7 +39,6 @@ export default function SettingDialog({ onClose: () => void; }) { const { config, saveConfig } = useAppContext(); - const [sectionIdx, setSectionIdx] = useState(0); // clone the config object to prevent direct mutation const [localConfig, setLocalConfig] = useState( @@ -280,148 +52,206 @@ export default function SettingDialog({ }; const handleSave = () => { - // copy the local config to prevent direct mutation - const newConfig: typeof CONFIG_DEFAULT = JSON.parse( - JSON.stringify(localConfig) - ); - // validate the config - for (const key in newConfig) { - const value = newConfig[key as SettKey]; - const mustBeBoolean = isBoolean(CONFIG_DEFAULT[key as SettKey]); - const mustBeString = isString(CONFIG_DEFAULT[key as SettKey]); - const mustBeNumeric = isNumeric(CONFIG_DEFAULT[key as SettKey]); - if (mustBeString) { - if (!isString(value)) { - alert(`Value for ${key} must be string`); - return; - } - } else if (mustBeNumeric) { - const trimedValue = value.toString().trim(); - const numVal = Number(trimedValue); - if (isNaN(numVal) || !isNumeric(numVal) || trimedValue.length === 0) { - alert(`Value for ${key} must be numeric`); - return; - } - // force conversion to number - // @ts-expect-error this is safe - newConfig[key] = numVal; - } else if (mustBeBoolean) { - if (!isBoolean(value)) { - alert(`Value for ${key} must be boolean`); - return; - } - } else { - console.error(`Unknown default type for key ${key}`); - } - } - if (isDev) console.log('Saving config', newConfig); - saveConfig(newConfig); + saveConfig(localConfig); onClose(); }; - const onChange = (key: SettKey) => (value: string | boolean) => { - // note: we do not perform validation here, because we may get incomplete value as user is still typing it - setLocalConfig({ ...localConfig, [key]: value }); + const debugImportDemoConv = async () => { + const res = await fetch('/demo-conversation.json'); + const demoConv = await res.json(); + StorageUtils.remove(demoConv.id); + for (const msg of demoConv.messages) { + StorageUtils.appendMsg(demoConv.id, msg); + } + onClose(); }; return ( - -
+ +

Settings

-
- {/* Left panel, showing sections - Desktop version */} -
- {SETTING_SECTIONS.map((section, idx) => ( -
setSectionIdx(idx)} - dir="auto" - > - {section.title} -
- ))} -
+
+

+ Settings below are saved in browser's localStorage +

- {/* Left panel, showing sections - Mobile version */} -
-
- - {SETTING_SECTIONS[sectionIdx].title} - -
    - {SETTING_SECTIONS.map((section, idx) => ( -
    setSectionIdx(idx)} - dir="auto" - > - {section.title} -
    - ))} -
-
-
+ + setLocalConfig({ ...localConfig, apiKey: value }) + } + /> - {/* Right panel, showing setting fields */} -
- {SETTING_SECTIONS[sectionIdx].fields.map((field, idx) => { - const key = `${sectionIdx}-${idx}`; - if (field.type === SettingInputType.SHORT_INPUT) { - return ( - - ); - } else if (field.type === SettingInputType.LONG_INPUT) { - return ( - - ); - } else if (field.type === SettingInputType.CHECKBOX) { - return ( - - ); - } else if (field.type === SettingInputType.CUSTOM) { - return ( -
- {typeof field.component === 'string' - ? field.component - : field.component({ - value: localConfig[field.key], - onChange: onChange(field.key), - })} -
- ); +