Merge branch 'master' into xsn/vision_2

This commit is contained in:
Xuan Son Nguyen 2025-02-06 18:08:37 +01:00
commit ff77b15845
114 changed files with 10490 additions and 4439 deletions

View file

@ -31,6 +31,11 @@ defer {
llama_model_free(model)
}
guard let vocab = llama_model_get_vocab(model) else {
print("Failed to get vocab")
exit(1)
}
var tokens = tokenize(text: prompt, add_bos: true)
let n_kv_req = UInt32(tokens.count) + UInt32((n_len - Int(tokens.count)) * n_parallel)
@ -41,7 +46,7 @@ context_params.n_batch = UInt32(max(n_len, n_parallel))
context_params.n_threads = 8
context_params.n_threads_batch = 8
let context = llama_new_context_with_model(model, context_params)
let context = llama_init_from_model(model, context_params)
guard context != nil else {
print("Failed to initialize context")
exit(1)
@ -141,7 +146,7 @@ while n_cur <= n_len {
let new_token_id = llama_sampler_sample(smpl, context, i_batch[i])
// is it an end of stream? -> mark the stream as finished
if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len {
if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len {
i_batch[i] = -1
// print("")
if n_parallel > 1 {
@ -207,7 +212,7 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
let utf8Count = text.utf8.count
let n_tokens = utf8Count + (add_bos ? 1 : 0)
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, /*special tokens*/ false)
var swiftTokens: [llama_token] = []
for i in 0 ..< tokenCount {
swiftTokens.append(tokens[Int(i)])
@ -218,12 +223,12 @@ private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String? {
var result = [CChar](repeating: 0, count: 8)
let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count), 0, false)
let nTokens = llama_token_to_piece(vocab, token, &result, Int32(result.count), 0, false)
if nTokens < 0 {
let actualTokensCount = -Int(nTokens)
result = .init(repeating: 0, count: actualTokensCount)
let check = llama_token_to_piece(
model,
vocab,
token,
&result,
Int32(result.count),

View file

@ -24,6 +24,7 @@ func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama
actor LlamaContext {
private var model: OpaquePointer
private var context: OpaquePointer
private var vocab: OpaquePointer
private var sampling: UnsafeMutablePointer<llama_sampler>
private var batch: llama_batch
private var tokens_list: [llama_token]
@ -47,6 +48,7 @@ actor LlamaContext {
self.sampling = llama_sampler_chain_init(sparams)
llama_sampler_chain_add(self.sampling, llama_sampler_init_temp(0.4))
llama_sampler_chain_add(self.sampling, llama_sampler_init_dist(1234))
vocab = llama_model_get_vocab(model)
}
deinit {
@ -79,7 +81,7 @@ actor LlamaContext {
ctx_params.n_threads = Int32(n_threads)
ctx_params.n_threads_batch = Int32(n_threads)
let context = llama_new_context_with_model(model, ctx_params)
let context = llama_init_from_model(model, ctx_params)
guard let context else {
print("Could not load context!")
throw LlamaError.couldNotInitializeContext
@ -151,7 +153,7 @@ actor LlamaContext {
new_token_id = llama_sampler_sample(sampling, context, batch.n_tokens - 1)
if llama_vocab_is_eog(model, new_token_id) || n_cur == n_len {
if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len {
print("\n")
is_done = true
let new_token_str = String(cString: temporary_invalid_cchars + [0])
@ -297,7 +299,7 @@ actor LlamaContext {
let utf8Count = text.utf8.count
let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
let tokenCount = llama_tokenize(vocab, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
var swiftTokens: [llama_token] = []
for i in 0..<tokenCount {
@ -316,7 +318,7 @@ actor LlamaContext {
defer {
result.deallocate()
}
let nTokens = llama_token_to_piece(model, token, result, 8, 0, false)
let nTokens = llama_token_to_piece(vocab, token, result, 8, 0, false)
if nTokens < 0 {
let newResult = UnsafeMutablePointer<Int8>.allocate(capacity: Int(-nTokens))
@ -324,7 +326,7 @@ actor LlamaContext {
defer {
newResult.deallocate()
}
let nNewTokens = llama_token_to_piece(model, token, newResult, -nTokens, 0, false)
let nNewTokens = llama_token_to_piece(vocab, token, newResult, -nTokens, 0, false)
let bufferPointer = UnsafeBufferPointer(start: newResult, count: Int(nNewTokens))
return Array(bufferPointer)
} else {

View file

@ -50,3 +50,10 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-qwen2vl-cli)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
set(TARGET llama-llava-clip-quantize-cli)
add_executable(${TARGET} clip-quantize-cli.cpp)
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

View file

@ -0,0 +1,44 @@
# Quantizing CLIP Visual Projector
This is the tool for quantizing the CLIP visual projector model. Quantization reduces the precision of the model's weights, which can significantly decrease the model size and improve inference speed, often with minimal impact on performance.
## Usage
To quantize a CLIP visual projector model, use the following command:
```sh
./bin/llama-llava-clip-quantize-cli /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf <type>
```
After the quantization, the visual projector can be used freely with the existing LLAVA cli (LLAVA, Qwen2VL, etc).
### Arguments
- `/path/to/ggml-model-f32.gguf`: The path to the input model file in FP32 or FP16 format.
- `/path/to/ggml-model-quantized.gguf`: The path where the quantized model will be saved.
- `<type>`: The quantization type to apply. This should be an integer corresponding to one of the quantization types defined in the `enum ggml_type`.
### Quantization Types
The following quantization types are supported, based on the `enum ggml_type` definition:
- `2` - `q4_0`: 4-bit quantization with a single scale value.
- `3` - `q4_1`: 4-bit quantization with a separate scale value for each block.
- `6` - `q5_0`: 5-bit quantization with a single scale value.
- `7` - `q5_1`: 5-bit quantization with a separate scale value for each block.
- `8` - `q8_0`: 8-bit quantization with a single scale value.
### Example
To quantize a model using the `q4_0` quantization type, you would run:
```sh
./bin/llama-llava-clip-quantize-cli /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf 2
```
This command will generate a quantized model at `/path/to/ggml-model-quantized.gguf` using the `q4_0` quantization method.
## Notes
- Quantization can lead to a loss in model accuracy, depending on the chosen quantization type. It is recommended to evaluate the quantized model's performance on your specific task to ensure it meets your requirements.
- The quantized model will typically be smaller in size and faster to run, making it more suitable for deployment in resource-constrained environments.

View file

@ -0,0 +1,59 @@
#include "arg.h"
#include "base64.hpp"
#include "log.h"
#include "common.h"
#include "sampling.h"
#include "clip.h"
#include "llava.h"
#include "llama.h"
#include "ggml.h"
static void print_usage(int argc, char ** argv) {
(void) argc;
fprintf(stderr, "usage: %s /path/to/ggml-model-f32.gguf /path/to/ggml-model-quantized.gguf type\n", argv[0]);
fprintf(stderr, " type = 2 - q4_0\n");
fprintf(stderr, " type = 3 - q4_1\n");
fprintf(stderr, " type = 6 - q5_0\n");
fprintf(stderr, " type = 7 - q5_1\n");
fprintf(stderr, " type = 8 - q8_0\n");
}
int main(int argc, char ** argv) {
if (argc != 4) {
print_usage(argc, argv);
return 1;
}
const std::string fname_inp = argv[1];
const std::string fname_out = argv[2];
const int itype = atoi(argv[3]);
const int64_t t_main_start_us = ggml_time_us();
int64_t t_quantize_us = 0;
// load the model
{
const int64_t t_start_us = ggml_time_us();
if (!clip_model_quantize(fname_inp.c_str(), fname_out.c_str(), itype)) {
fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
return 1;
}
t_quantize_us = ggml_time_us() - t_start_us;
}
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
printf("\n");
printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us / 1000.0f);
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
}
return 0;
}

View file

@ -2745,10 +2745,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
}
bool clip_model_quantize(const char * fname_inp, const char * fname_out, const int itype) {
ggml_type type = GGML_TYPE_Q4_1;
assert(itype < GGML_TYPE_COUNT);
type = static_cast<ggml_type>(itype);
ggml_type type = static_cast<ggml_type>(itype);
auto * ctx_clip = clip_model_load(fname_inp, 2);
@ -2801,8 +2799,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
}
}
// quantize only 2D tensors
quantize &= (ggml_n_dims(cur) == 2);
// quantize only 2D tensors and bigger than block size
quantize &= (ggml_n_dims(cur) == 2) && cur->ne[0] > ggml_blck_size(type);
if (quantize) {
new_type = type;

View file

@ -24,15 +24,16 @@
#include <string>
#include <vector>
#include "chat-template.hpp"
#include "common.h"
#include "json.hpp"
#include "linenoise.cpp/linenoise.h"
#include "llama-cpp.h"
#include "chat-template.hpp"
#include "log.h"
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
[[noreturn]] static void sigint_handler(int) {
printf("\n\033[0m");
printf("\n" LOG_COL_DEFAULT);
exit(0); // not ideal, but it's the only way to guarantee exit in all cases
}
#endif
@ -847,7 +848,15 @@ static int apply_chat_template(const common_chat_template & tmpl, LlamaData & ll
});
}
try {
auto result = tmpl.apply(messages, /* tools= */ json(), append);
minja::chat_template_inputs tmpl_inputs;
tmpl_inputs.messages = messages;
tmpl_inputs.add_generation_prompt = append;
minja::chat_template_options tmpl_opts;
tmpl_opts.use_bos_token = false;
tmpl_opts.use_eos_token = false;
auto result = tmpl.apply(tmpl_inputs, tmpl_opts);
llama_data.fmtted.resize(result.size() + 1);
memcpy(llama_data.fmtted.data(), result.c_str(), result.size() + 1);
return result.size();
@ -890,7 +899,7 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch &
const int n_ctx = llama_n_ctx(ctx.get());
const int n_ctx_used = llama_get_kv_cache_used_cells(ctx.get());
if (n_ctx_used + batch.n_tokens > n_ctx) {
printf("\033[0m\n");
printf(LOG_COL_DEFAULT "\n");
printe("context size exceeded\n");
return 1;
}
@ -953,7 +962,7 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str
batch = llama_batch_get_one(&new_token_id, 1);
}
printf("\033[0m");
printf(LOG_COL_DEFAULT);
return 0;
}
@ -962,7 +971,7 @@ static int read_user_input(std::string & user_input) {
#ifdef WIN32
printf(
"\r%*s"
"\r\033[0m%s",
"\r" LOG_COL_DEFAULT "%s",
get_terminal_width(), " ", prompt_prefix);
std::getline(std::cin, user_input);
@ -999,7 +1008,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
const bool stdout_a_terminal) {
// Set response color
if (stdout_a_terminal) {
printf("\033[33m");
printf(LOG_COL_YELLOW);
}
if (generate(llama_data, prompt, response)) {
@ -1008,7 +1017,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt,
}
// End response with color reset and newline
printf("\n%s", stdout_a_terminal ? "\033[0m" : "");
printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : "");
return 0;
}

Binary file not shown.

View file

@ -3354,6 +3354,8 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
return;
}
// reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch
LOG_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status);
LOG_DBG("request: %s\n", req.body.c_str());
@ -3440,9 +3442,13 @@ int main(int argc, char ** argv) {
message = "Unknown Exception";
}
json formatted_error = format_error_response(message, ERROR_TYPE_SERVER);
LOG_WRN("got exception: %s\n", formatted_error.dump().c_str());
res_error(res, formatted_error);
try {
json formatted_error = format_error_response(message, ERROR_TYPE_SERVER);
LOG_WRN("got exception: %s\n", formatted_error.dump().c_str());
res_error(res, formatted_error);
} catch (const std::exception & e) {
LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str());
}
});
svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) {

View file

@ -13,9 +13,12 @@ def create_server():
@pytest.mark.parametrize(
"model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template",
[
(None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", False, None),
(None, "Book", "Hey", 8, "But she couldn't", 69, 8, "length", True, None),
(None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None),
(None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None),
(None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"),
(None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None),
(None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, 'chatml'),
(None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"),
("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None),
("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None),
]

View file

@ -67,8 +67,8 @@ WEATHER_TOOL = {
def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, argument_key: str | None):
n_predict = 512
global server
n_predict = 512
# server = ServerPreset.stories15m_moe()
server.jinja = True
server.n_predict = n_predict
@ -139,29 +139,49 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
@pytest.mark.parametrize("tool,argument_key,hf_repo,template_override", [
(TEST_TOOL, "success", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
(TEST_TOOL, "success", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
(TEST_TOOL, "success", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
(PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", ("meetkai/functionary-medium-v3.2", None)),
(PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", ("meetkai/functionary-medium-v3.2", None)),
(PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
# TODO: fix these
# (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
# (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
def test_completion_with_required_tool_real_model(tool: dict, argument_key: str | None, hf_repo: str, template_override: Tuple[str, str | None] | None):
def test_completion_with_required_tool_real_model(tool: dict, argument_key: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
n_predict = 512
server.n_slots = 1
server.jinja = True
@ -169,10 +189,12 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
server.n_predict = n_predict
server.model_hf_repo = hf_repo
server.model_hf_file = None
if template_override:
if isinstance(template_override, tuple):
(template_hf_repo, template_variant) = template_override
server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": n_predict,
@ -251,33 +273,55 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
@pytest.mark.slow
@pytest.mark.parametrize("hf_repo,template_override", [
("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")),
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
# ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
# ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | None] | None):
def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None):
global server
n_predict = 512
server.n_slots = 1
server.jinja = True
server.n_ctx = 8192
server.n_predict = 512
server.n_predict = n_predict
server.model_hf_repo = hf_repo
server.model_hf_file = None
if template_override:
if isinstance(template_override, tuple):
(template_hf_repo, template_variant) = template_override
server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": 256,
"max_tokens": n_predict,
"messages": [
{"role": "user", "content": "What is the weather in Istanbul?"},
],
@ -298,19 +342,39 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
@pytest.mark.slow
@pytest.mark.parametrize("expected_arguments_override,hf_repo,template_override", [
(None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)),
('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
(None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
('{"code":"print("}', "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
(None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo: str, template_override: Tuple[str, str | None] | None):
def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
server.n_slots = 1
server.jinja = True
@ -318,10 +382,12 @@ def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo:
server.n_predict = 128
server.model_hf_repo = hf_repo
server.model_hf_file = None
if template_override:
if isinstance(template_override, tuple):
(template_hf_repo, template_variant) = template_override
server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": 256,

View file

@ -5,10 +5,6 @@
#include "llama.h"
#include "common/base64.hpp"
#ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error
#define CPPHTTPLIB_NO_EXCEPTIONS 1
#endif
// increase max payload length to allow use of larger context size
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
#include "httplib.h"

24
examples/server/webui/.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

View file

@ -0,0 +1,10 @@
**/.vscode
**/.github
**/.git
**/.svn
**/.hg
**/node_modules
**/dist
**/build
*.config.js

View file

@ -0,0 +1,26 @@
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
export default tseslint.config(
{ ignores: ['dist'] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': 'off',
'@typescript-eslint/no-unused-vars': 'off',
},
},
)

View file

@ -1,343 +1,16 @@
<!DOCTYPE html>
<!doctype html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
<meta name="color-scheme" content="light dark">
<title>🦙 llama.cpp - chat</title>
</head>
<body>
<div id="app" class="opacity-0"> <!-- opacity-0 will be removed on app mounted -->
<div class="flex flex-row drawer lg:drawer-open">
<input id="toggle-drawer" type="checkbox" class="drawer-toggle" checked />
<!-- sidebar -->
<div class="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
<label for="toggle-drawer" aria-label="close sidebar" class="drawer-overlay"></label>
<div class="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
<div class="flex flex-row items-center justify-between mb-4 mt-4">
<h2 class="font-bold ml-4">Conversations</h2>
<!-- close sidebar button -->
<label for="toggle-drawer" class="btn btn-ghost lg:hidden">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-arrow-bar-left" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M12.5 15a.5.5 0 0 1-.5-.5v-13a.5.5 0 0 1 1 0v13a.5.5 0 0 1-.5.5M10 8a.5.5 0 0 1-.5.5H3.707l2.147 2.146a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 1 1 .708.708L3.707 7.5H9.5a.5.5 0 0 1 .5.5"/>
</svg>
</label>
</div>
<!-- list of conversations -->
<div :class="{
'btn btn-ghost justify-start': true,
'btn-active': messages.length === 0,
}" @click="newConversation">
+ New conversation
</div>
<div v-for="conv in conversations" :class="{
'btn btn-ghost justify-start font-normal': true,
'btn-active': conv.id === viewingConvId,
}" @click="setViewingConv(conv.id)" dir="auto">
<span class="truncate">{{ conv.messages[0].content }}</span>
</div>
<div class="text-center text-xs opacity-40 mt-auto mx-4">
Conversations are saved to browser's localStorage
</div>
</div>
</div>
<!-- main view -->
<div class="chat-screen drawer-content grow flex flex-col h-screen w-screen mx-auto px-4">
<!-- header -->
<div class="flex flex-row items-center mt-6 mb-6">
<!-- open sidebar button -->
<label for="toggle-drawer" class="btn btn-ghost lg:hidden">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-list" viewBox="0 0 16 16">
<path fill-rule="evenodd" d="M2.5 12a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5"/>
</svg>
</label>
<div class="grow text-2xl font-bold ml-2">llama.cpp</div>
<!-- action buttons (top right) -->
<div class="flex items-center">
<div v-if="messages.length > 0" class="dropdown dropdown-end">
<!-- "..." button -->
<button tabindex="0" role="button" class="btn m-1" :disabled="isGenerating">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-three-dots-vertical" viewBox="0 0 16 16">
<path d="M9.5 13a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0"/>
</svg>
</button>
<!-- "delete" dropdown menu -->
<ul tabindex="0" class="dropdown-content menu bg-base-100 rounded-box z-[1] w-52 p-2 shadow">
<li @click="downloadConv(viewingConvId)"><a>Download</a></li>
<li class="text-error" @click="deleteConv(viewingConvId)"><a>Delete</a></li>
</ul>
</div>
<div class="tooltip tooltip-bottom" data-tip="Settings">
<button class="btn" @click="showConfigDialog = true" :disabled="isGenerating">
<!-- settings button -->
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-gear" viewBox="0 0 16 16">
<path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492M5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0"/>
<path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52zm-2.633.283c.246-.835 1.428-.835 1.674 0l.094.319a1.873 1.873 0 0 0 2.693 1.115l.291-.16c.764-.415 1.6.42 1.184 1.185l-.159.292a1.873 1.873 0 0 0 1.116 2.692l.318.094c.835.246.835 1.428 0 1.674l-.319.094a1.873 1.873 0 0 0-1.115 2.693l.16.291c.415.764-.42 1.6-1.185 1.184l-.291-.159a1.873 1.873 0 0 0-2.693 1.116l-.094.318c-.246.835-1.428.835-1.674 0l-.094-.319a1.873 1.873 0 0 0-2.692-1.115l-.292.16c-.764.415-1.6-.42-1.184-1.185l.159-.291A1.873 1.873 0 0 0 1.945 8.93l-.319-.094c-.835-.246-.835-1.428 0-1.674l.319-.094A1.873 1.873 0 0 0 3.06 4.377l-.16-.292c-.415-.764.42-1.6 1.185-1.184l.292.159a1.873 1.873 0 0 0 2.692-1.115z"/>
</svg>
</button>
</div>
<!-- theme controller is copied from https://daisyui.com/components/theme-controller/ -->
<div class="tooltip tooltip-bottom" data-tip="Themes">
<div class="dropdown dropdown-end dropdown-bottom">
<div tabindex="0" role="button" class="btn m-1">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-palette2" viewBox="0 0 16 16">
<path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z"/>
</svg>
</div>
<ul tabindex="0" class="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto">
<li>
<button
class="btn btn-sm btn-block btn-ghost justify-start"
:class="{ 'btn-active': selectedTheme === 'auto' }"
@click="setSelectedTheme('auto')">
auto
</button>
</li>
<li v-for="theme in themes">
<input
type="radio"
name="theme-dropdown"
class="theme-controller btn btn-sm btn-block btn-ghost justify-start"
:aria-label="theme"
:value="theme"
:checked="selectedTheme === theme"
@click="setSelectedTheme(theme)" />
</li>
</ul>
</div>
</div>
</div>
</div>
<!-- chat messages -->
<div id="messages-list" class="flex flex-col grow overflow-y-auto">
<div class="mt-auto flex justify-center">
<!-- placeholder to shift the message to the bottom -->
{{ messages.length === 0 ? 'Send a message to start' : '' }}
</div>
<div v-for="msg in messages" class="group">
<message-bubble
:config="config"
:msg="msg"
:key="msg.id"
:is-generating="isGenerating"
:edit-user-msg-and-regenerate="editUserMsgAndRegenerate"
:regenerate-msg="regenerateMsg"></message-bubble>
</div>
<!-- pending (ongoing) assistant message -->
<div id="pending-msg" class="group">
<message-bubble
v-if="pendingMsg"
:config="config"
:msg="pendingMsg"
:key="pendingMsg.id"
:is-generating="isGenerating"
:show-thought-in-progress="config.showThoughtInProgress"
:edit-user-msg-and-regenerate="() => {}"
:regenerate-msg="() => {}"></message-bubble>
</div>
</div>
<!-- chat input -->
<div class="flex flex-row items-center mt-8 mb-6">
<textarea
class="textarea textarea-bordered w-full"
placeholder="Type a message (Shift+Enter to add a new line)"
v-model="inputMsg"
@keydown.enter.exact.prevent="sendMessage"
@keydown.enter.shift.exact.prevent="inputMsg += '\n'"
:disabled="isGenerating"
id="msg-input"
dir="auto"
></textarea>
<button v-if="!isGenerating" class="btn btn-primary ml-2" @click="sendMessage" :disabled="inputMsg.length === 0">Send</button>
<button v-else class="btn btn-neutral ml-2" @click="stopGeneration">Stop</button>
</div>
</div>
</div>
<!-- modal for editing config -->
<dialog class="modal" :class="{'modal-open': showConfigDialog}">
<div class="modal-box">
<h3 class="text-lg font-bold mb-6">Settings</h3>
<div class="h-[calc(90vh-12rem)] overflow-y-auto">
<p class="opacity-40 mb-6">Settings below are saved in browser's localStorage</p>
<settings-modal-short-input :config-key="'apiKey'" :config-default="configDefault" :config-info="configInfo" v-model="config.apiKey"></settings-modal-short-input>
<label class="form-control mb-2">
<div class="label">System Message</div>
<textarea class="textarea textarea-bordered h-24" :placeholder="'Default: ' + configDefault.systemMessage" v-model="config.systemMessage"></textarea>
</label>
<template v-for="configKey in ['temperature', 'top_k', 'top_p', 'min_p', 'max_tokens']">
<settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
</template>
<!-- TODO: add more sampling-related configs, please regroup them into different "collapse" sections -->
<!-- Section: Other sampler settings -->
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary class="collapse-title font-bold">Other sampler settings</summary>
<div class="collapse-content">
<!-- Samplers queue -->
<settings-modal-short-input label="Samplers queue" :config-key="'samplers'" :config-default="configDefault" :config-info="configInfo" v-model="config.samplers"></settings-modal-short-input>
<!-- Samplers -->
<template v-for="configKey in ['dynatemp_range', 'dynatemp_exponent', 'typical_p', 'xtc_probability', 'xtc_threshold']">
<settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
</template>
</div>
</details>
<!-- Section: Penalties settings -->
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary class="collapse-title font-bold">Penalties settings</summary>
<div class="collapse-content">
<template v-for="configKey in ['repeat_last_n', 'repeat_penalty', 'presence_penalty', 'frequency_penalty', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_penalty_last_n']">
<settings-modal-short-input :config-key="configKey" :config-default="configDefault" :config-info="configInfo" v-model="config[configKey]"></settings-modal-short-input>
</template>
</div>
</details>
<!-- Section: Reasoning models -->
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary class="collapse-title font-bold">Reasoning models</summary>
<div class="collapse-content">
<div class="flex flex-row items-center mb-2">
<input type="checkbox" class="checkbox" v-model="config.showThoughtInProgress" />
<span class="ml-4">Expand though process by default for generating message</span>
</div>
<div class="flex flex-row items-center mb-2">
<input type="checkbox" class="checkbox" v-model="config.excludeThoughtOnReq" />
<span class="ml-4">Exclude thought process when sending request to API (Recommended for DeepSeek-R1)</span>
</div>
</div>
</details>
<!-- Section: Advanced config -->
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary class="collapse-title font-bold">Advanced config</summary>
<div class="collapse-content">
<div class="flex flex-row items-center mb-2" v-if="isDev">
<!-- this button only shows in dev mode, used to import a demo conversation to test message rendering -->
<button class="btn" @click="debugImportDemoConv()">(debug) Import demo conversation</button>
</div>
<div class="flex flex-row items-center mb-2">
<input type="checkbox" class="checkbox" v-model="config.showTokensPerSecond" />
<span class="ml-4">Show tokens per second</span>
</div>
<label class="form-control mb-2">
<!-- Custom parameters input -->
<div class="label inline">Custom JSON config (For more info, refer to <a class="underline" href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md" target="_blank" rel="noopener noreferrer">server documentation</a>)</div>
<textarea class="textarea textarea-bordered h-24" placeholder="Example: { &quot;mirostat&quot;: 1, &quot;min_p&quot;: 0.1 }" v-model="config.custom"></textarea>
</label>
</div>
</details>
</div>
<!-- action buttons -->
<div class="modal-action">
<button class="btn" @click="resetConfigDialog">Reset to default</button>
<button class="btn" @click="closeAndDiscardConfigDialog">Close</button>
<button class="btn btn-primary" @click="closeAndSaveConfigDialog">Save</button>
</div>
</div>
</dialog>
</div>
<!-- Template to be used as message bubble -->
<template id="message-bubble">
<div :class="{
'chat': true,
'chat-start': msg.role !== 'user',
'chat-end': msg.role === 'user',
}">
<div :class="{
'chat-bubble markdown': true,
'chat-bubble-base-300': msg.role !== 'user',
}">
<!-- textarea for editing message -->
<template v-if="editingContent !== null">
<textarea
dir="auto"
class="textarea textarea-bordered bg-base-100 text-base-content w-[calc(90vw-8em)] lg:w-96"
v-model="editingContent"></textarea>
<br/>
<button class="btn btn-ghost mt-2 mr-2" @click="editingContent = null">Cancel</button>
<button class="btn mt-2" @click="editMsg()">Submit</button>
</template>
<template v-else>
<!-- show loading dots for pending message -->
<span v-if="msg.content === null" class="loading loading-dots loading-md"></span>
<!-- render message as markdown -->
<div v-else dir="auto">
<details v-if="msg.role === 'assistant' && splitMsgContent.cot" class="collapse bg-base-200 collapse-arrow mb-4" :open="splitMsgContent.isThinking && showThoughtInProgress">
<summary class="collapse-title">
<span v-if="splitMsgContent.isThinking">
<span v-if="isGenerating" class="loading loading-spinner loading-md mr-2" style="vertical-align: middle;"></span>
<b>Thinking</b>
</span>
<b v-else>Thought Process</b>
</summary>
<vue-markdown :source="splitMsgContent.cot" dir="auto" class="collapse-content"></vue-markdown>
</details>
<vue-markdown :source="splitMsgContent.content"></vue-markdown>
</div>
<!-- render timings if enabled -->
<div class="dropdown dropdown-hover dropdown-top mt-2" v-if="timings && config.showTokensPerSecond">
<div tabindex="0" role="button" class="cursor-pointer font-semibold text-sm opacity-60">Speed: {{ timings.predicted_per_second.toFixed(1) }} t/s</div>
<div class="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
<b>Prompt</b><br/>
- Tokens: {{ timings.prompt_n }}<br/>
- Time: {{ timings.prompt_ms }} ms<br/>
- Speed: {{ timings.prompt_per_second.toFixed(1) }} t/s<br/>
<b>Generation</b><br/>
- Tokens: {{ timings.predicted_n }}<br/>
- Time: {{ timings.predicted_ms }} ms<br/>
- Speed: {{ timings.predicted_per_second.toFixed(1) }} t/s<br/>
</div>
</div>
</template>
</div>
</div>
<!-- actions for each message -->
<div :class="{'text-right': msg.role === 'user', 'opacity-0': isGenerating}" class="mx-4 mt-2 mb-2">
<!-- user message -->
<button v-if="msg.role === 'user'" class="badge btn-mini show-on-hover" @click="editingContent = msg.content" :disabled="isGenerating">
✍️ Edit
</button>
<!-- assistant message -->
<button v-if="msg.role === 'assistant'" class="badge btn-mini show-on-hover mr-2" @click="regenerateMsg(msg)" :disabled="isGenerating">
🔄 Regenerate
</button>
<button v-if="msg.role === 'assistant'" class="badge btn-mini show-on-hover mr-2" @click="copyMsg()" :disabled="isGenerating">
📋 Copy
</button>
</div>
</template>
<!-- Template to be used by settings modal -->
<template id="settings-modal-short-input">
<label class="input input-bordered join-item grow flex items-center gap-2 mb-2">
<!-- Show help message on hovering on the input label -->
<div class="dropdown dropdown-hover">
<div tabindex="0" role="button" class="font-bold">{{ label || configKey }}</div>
<div class="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
{{ configInfo[configKey] || '(no help message available)' }}
</div>
</div>
<!-- Here we forward v-model from parent to child component, see: https://stackoverflow.com/questions/47311936/v-model-and-child-components -->
<input type="text" class="grow" :placeholder="'Default: ' + (configDefault[configKey] || 'none')" :value="modelValue" @input="$emit('update:modelValue', $event.target.value)" />
</label>
</template>
<script type="module" src="/src/main.js"></script>
</body>
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1, maximum-scale=1"
/>
<meta name="color-scheme" content="light dark" />
<title>🦙 llama.cpp - chat</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

File diff suppressed because it is too large Load diff

View file

@ -5,13 +5,10 @@
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview",
"analyze": "ANALYZE=1 npx vite-bundle-visualizer"
},
"devDependencies": {
"sass-embedded": "^1.83.0",
"vite": "^5.4.10"
"build": "tsc -b && vite build",
"format": "eslint . && prettier --write .",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@sec-ant/readable-stream": "^0.6.0",
@ -20,11 +17,42 @@
"daisyui": "^4.12.14",
"highlight.js": "^11.10.0",
"katex": "^0.16.15",
"markdown-it": "^14.1.0",
"postcss": "^8.4.49",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-markdown": "^9.0.3",
"react-router": "^7.1.5",
"rehype-highlight": "^7.0.2",
"rehype-katex": "^7.0.1",
"remark-breaks": "^4.0.0",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0",
"tailwindcss": "^3.4.15",
"textlinestream": "^1.1.1",
"vite-plugin-singlefile": "^2.0.3",
"vue": "^3.5.13"
"vite-plugin-singlefile": "^2.0.3"
},
"devDependencies": {
"@eslint/js": "^9.17.0",
"@types/markdown-it": "^14.1.2",
"@types/node": "^22.13.1",
"@types/react": "^18.3.18",
"@types/react-dom": "^18.3.5",
"@vitejs/plugin-react": "^4.3.4",
"eslint": "^9.17.0",
"eslint-plugin-react-hooks": "^5.0.0",
"eslint-plugin-react-refresh": "^0.4.16",
"globals": "^15.14.0",
"prettier": "^3.4.2",
"sass-embedded": "^1.83.4",
"typescript": "~5.6.2",
"typescript-eslint": "^8.18.2",
"vite": "^6.0.5"
},
"prettier": {
"trailingComma": "es5",
"tabWidth": 2,
"semi": true,
"singleQuote": true,
"bracketSameLine": false
}
}

View file

@ -11,7 +11,7 @@
{
"id": 1734087548327,
"role": "assistant",
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
"content": "This is the formula:\n\n$\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}$\n\nGiven an input vector \\(\\mathbf{x} = [x_1, x_2, \\ldots, x_n]\\)\n\n\\[\ny_i = \\frac{e^{x_i}}{\\sum_{j=1}^n e^{x_j}}\n\\]\n\n$2x + y = z$\n\nCode block latex:\n```latex\n\\frac{e^{x_i}}{\\sum_{j=1}^{n}e^{x_j}}\n```\n\nTest dollar sign: $1234 $4567\n\nInvalid latex syntax: $E = mc^$ and $$E = mc^$$",
"timings": {
"prompt_n": 1,
"prompt_ms": 28.923,

View file

@ -0,0 +1,36 @@
import { HashRouter, Outlet, Route, Routes } from 'react-router';
import Header from './components/Header';
import Sidebar from './components/Sidebar';
import { AppContextProvider } from './utils/app.context';
import ChatScreen from './components/ChatScreen';
function App() {
return (
<HashRouter>
<div className="flex flex-row drawer lg:drawer-open">
<AppContextProvider>
<Routes>
<Route element={<AppLayout />}>
<Route path="/chat/:convId" element={<ChatScreen />} />
<Route path="*" element={<ChatScreen />} />
</Route>
</Routes>
</AppContextProvider>
</div>
</HashRouter>
);
}
function AppLayout() {
return (
<>
<Sidebar />
<div className="chat-screen drawer-content grow flex flex-col h-screen w-screen mx-auto px-4">
<Header />
<Outlet />
</div>
</>
);
}
export default App;

View file

@ -0,0 +1,89 @@
import daisyuiThemes from 'daisyui/src/theming/themes';
import { isNumeric } from './utils/misc';
export const isDev = import.meta.env.MODE === 'development';
// constants
export const BASE_URL = new URL('.', document.baseURI).href
.toString()
.replace(/\/$/, '');
export const CONFIG_DEFAULT = {
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
apiKey: '',
systemMessage: 'You are a helpful assistant.',
showTokensPerSecond: false,
showThoughtInProgress: false,
excludeThoughtOnReq: true,
// make sure these default values are in sync with `common.h`
samplers: 'edkypmxt',
temperature: 0.8,
dynatemp_range: 0.0,
dynatemp_exponent: 1.0,
top_k: 40,
top_p: 0.95,
min_p: 0.05,
xtc_probability: 0.0,
xtc_threshold: 0.1,
typical_p: 1.0,
repeat_last_n: 64,
repeat_penalty: 1.0,
presence_penalty: 0.0,
frequency_penalty: 0.0,
dry_multiplier: 0.0,
dry_base: 1.75,
dry_allowed_length: 2,
dry_penalty_last_n: -1,
max_tokens: -1,
custom: '', // custom json-stringified object
};
export const CONFIG_INFO: Record<string, string> = {
apiKey: 'Set the API Key if you are using --api-key option for the server.',
systemMessage: 'The starting message that defines how model should behave.',
samplers:
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
temperature:
'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
dynatemp_range:
'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
dynatemp_exponent:
'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
top_k: 'Keeps only k top tokens.',
top_p:
'Limits tokens to those that together have a cumulative probability of at least p',
min_p:
'Limits tokens based on the minimum probability for a token to be considered, relative to the probability of the most likely token.',
xtc_probability:
'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
xtc_threshold:
'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
typical_p:
'Sorts and limits tokens based on the difference between log-probability and entropy.',
repeat_last_n: 'Last n tokens to consider for penalizing repetition',
repeat_penalty:
'Controls the repetition of token sequences in the generated text',
presence_penalty:
'Limits tokens based on whether they appear in the output or not.',
frequency_penalty:
'Limits tokens based on how often they appear in the output.',
dry_multiplier:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling multiplier.',
dry_base:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling base value.',
dry_allowed_length:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the allowed length for DRY sampling.',
dry_penalty_last_n:
'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.',
max_tokens: 'The maximum number of token per output.',
custom: '', // custom json-stringified object
};
// config keys having numeric value (i.e. temperature, top_k, top_p, etc)
export const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT)
.filter((e) => isNumeric(e[1]))
.map((e) => e[0]);
// list of themes supported by daisyui
export const THEMES = ['light', 'dark']
// make sure light & dark are always at the beginning
.concat(
Object.keys(daisyuiThemes).filter((t) => t !== 'light' && t !== 'dark')
);

View file

@ -0,0 +1,229 @@
import { useMemo, useState } from 'react';
import { useAppContext } from '../utils/app.context';
import { Message, PendingMessage } from '../utils/types';
import { classNames } from '../utils/misc';
import MarkdownDisplay, { CopyButton } from './MarkdownDisplay';
interface SplitMessage {
content: PendingMessage['content'];
thought?: string;
isThinking?: boolean;
}
export default function ChatMessage({
msg,
id,
scrollToBottom,
isPending,
}: {
msg: Message | PendingMessage;
id?: string;
scrollToBottom: (requiresNearBottom: boolean) => void;
isPending?: boolean;
}) {
const { viewingConversation, replaceMessageAndGenerate, config } =
useAppContext();
const [editingContent, setEditingContent] = useState<string | null>(null);
const timings = useMemo(
() =>
msg.timings
? {
...msg.timings,
prompt_per_second:
(msg.timings.prompt_n / msg.timings.prompt_ms) * 1000,
predicted_per_second:
(msg.timings.predicted_n / msg.timings.predicted_ms) * 1000,
}
: null,
[msg.timings]
);
// for reasoning model, we split the message into content and thought
// TODO: implement this as remark/rehype plugin in the future
const { content, thought, isThinking }: SplitMessage = useMemo(() => {
if (msg.content === null || msg.role !== 'assistant') {
return { content: msg.content };
}
let actualContent = '';
let thought = '';
let isThinking = false;
let thinkSplit = msg.content.split('<think>', 2);
actualContent += thinkSplit[0];
while (thinkSplit[1] !== undefined) {
// <think> tag found
thinkSplit = thinkSplit[1].split('</think>', 2);
thought += thinkSplit[0];
isThinking = true;
if (thinkSplit[1] !== undefined) {
// </think> closing tag found
isThinking = false;
thinkSplit = thinkSplit[1].split('<think>', 2);
actualContent += thinkSplit[0];
}
}
return { content: actualContent, thought, isThinking };
}, [msg]);
if (!viewingConversation) return null;
const regenerate = async () => {
replaceMessageAndGenerate(viewingConversation.id, msg.id, undefined, () =>
scrollToBottom(true)
);
};
return (
<div className="group" id={id}>
<div
className={classNames({
chat: true,
'chat-start': msg.role !== 'user',
'chat-end': msg.role === 'user',
})}
>
<div
className={classNames({
'chat-bubble markdown': true,
'chat-bubble-base-300': msg.role !== 'user',
})}
>
{/* textarea for editing message */}
{editingContent !== null && (
<>
<textarea
dir="auto"
className="textarea textarea-bordered bg-base-100 text-base-content w-[calc(90vw-8em)] lg:w-96"
value={editingContent}
onChange={(e) => setEditingContent(e.target.value)}
></textarea>
<br />
<button
className="btn btn-ghost mt-2 mr-2"
onClick={() => setEditingContent(null)}
>
Cancel
</button>
<button
className="btn mt-2"
onClick={() =>
replaceMessageAndGenerate(
viewingConversation.id,
msg.id,
editingContent
)
}
>
Submit
</button>
</>
)}
{/* not editing content, render message */}
{editingContent === null && (
<>
{content === null ? (
<>
{/* show loading dots for pending message */}
<span className="loading loading-dots loading-md"></span>
</>
) : (
<>
{/* render message as markdown */}
<div dir="auto">
{thought && (
<details
className="collapse bg-base-200 collapse-arrow mb-4"
open={isThinking && config.showThoughtInProgress}
>
<summary className="collapse-title">
{isPending && isThinking ? (
<span>
<span
v-if="isGenerating"
className="loading loading-spinner loading-md mr-2"
style={{ verticalAlign: 'middle' }}
></span>
<b>Thinking</b>
</span>
) : (
<b>Thought Process</b>
)}
</summary>
<div className="collapse-content">
<MarkdownDisplay content={thought} />
</div>
</details>
)}
<MarkdownDisplay content={content} />
</div>
</>
)}
{/* render timings if enabled */}
{timings && config.showTokensPerSecond && (
<div className="dropdown dropdown-hover dropdown-top mt-2">
<div
tabIndex={0}
role="button"
className="cursor-pointer font-semibold text-sm opacity-60"
>
Speed: {timings.predicted_per_second.toFixed(1)} t/s
</div>
<div className="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
<b>Prompt</b>
<br />- Tokens: {timings.prompt_n}
<br />- Time: {timings.prompt_ms} ms
<br />- Speed: {timings.prompt_per_second.toFixed(1)} t/s
<br />
<b>Generation</b>
<br />- Tokens: {timings.predicted_n}
<br />- Time: {timings.predicted_ms} ms
<br />- Speed: {timings.predicted_per_second.toFixed(1)} t/s
<br />
</div>
</div>
)}
</>
)}
</div>
</div>
{/* actions for each message */}
{msg.content !== null && (
<div
className={classNames({
'mx-4 mt-2 mb-2': true,
'text-right': msg.role === 'user',
})}
>
{/* user message */}
{msg.role === 'user' && (
<button
className="badge btn-mini show-on-hover"
onClick={() => setEditingContent(msg.content)}
disabled={msg.content === null}
>
Edit
</button>
)}
{/* assistant message */}
{msg.role === 'assistant' && (
<>
{!isPending && (
<button
className="badge btn-mini show-on-hover mr-2"
onClick={regenerate}
disabled={msg.content === null}
>
🔄 Regenerate
</button>
)}
<CopyButton
className="badge btn-mini show-on-hover mr-2"
content={msg.content}
/>
</>
)}
</div>
)}
</div>
);
}

View file

@ -0,0 +1,123 @@
import { useEffect, useRef, useState } from 'react';
import { useAppContext } from '../utils/app.context';
import StorageUtils from '../utils/storage';
import { useNavigate } from 'react-router';
import ChatMessage from './ChatMessage';
import { PendingMessage } from '../utils/types';
export default function ChatScreen() {
const {
viewingConversation,
sendMessage,
isGenerating,
stopGenerating,
pendingMessages,
} = useAppContext();
const [inputMsg, setInputMsg] = useState('');
const containerRef = useRef<HTMLDivElement>(null);
const navigate = useNavigate();
const currConvId = viewingConversation?.id ?? '';
const pendingMsg: PendingMessage | undefined = pendingMessages[currConvId];
const scrollToBottom = (requiresNearBottom: boolean) => {
if (!containerRef.current) return;
const msgListElem = containerRef.current;
const spaceToBottom =
msgListElem.scrollHeight -
msgListElem.scrollTop -
msgListElem.clientHeight;
if (!requiresNearBottom || spaceToBottom < 50) {
setTimeout(
() => msgListElem.scrollTo({ top: msgListElem.scrollHeight }),
1
);
}
};
// scroll to bottom when conversation changes
useEffect(() => {
scrollToBottom(false);
}, [viewingConversation?.id]);
const sendNewMessage = async () => {
if (inputMsg.trim().length === 0 || isGenerating(currConvId)) return;
const convId = viewingConversation?.id ?? StorageUtils.getNewConvId();
const lastInpMsg = inputMsg;
setInputMsg('');
if (!viewingConversation) {
// if user is creating a new conversation, redirect to the new conversation
navigate(`/chat/${convId}`);
}
scrollToBottom(false);
// auto scroll as message is being generated
const onChunk = () => scrollToBottom(true);
if (!(await sendMessage(convId, inputMsg, onChunk))) {
// restore the input message if failed
setInputMsg(lastInpMsg);
}
};
return (
<>
{/* chat messages */}
<div
id="messages-list"
className="flex flex-col grow overflow-y-auto"
ref={containerRef}
>
<div className="mt-auto flex justify-center">
{/* placeholder to shift the message to the bottom */}
{viewingConversation ? '' : 'Send a message to start'}
</div>
{viewingConversation?.messages.map((msg) => (
<ChatMessage key={msg.id} msg={msg} scrollToBottom={scrollToBottom} />
))}
{pendingMsg && (
<ChatMessage
msg={pendingMsg}
scrollToBottom={scrollToBottom}
isPending
id="pending-msg"
/>
)}
</div>
{/* chat input */}
<div className="flex flex-row items-center mt-8 mb-6">
<textarea
className="textarea textarea-bordered w-full"
placeholder="Type a message (Shift+Enter to add a new line)"
value={inputMsg}
onChange={(e) => setInputMsg(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter' && e.shiftKey) return;
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
sendNewMessage();
}
}}
id="msg-input"
dir="auto"
></textarea>
{isGenerating(currConvId) ? (
<button
className="btn btn-neutral ml-2"
onClick={() => stopGenerating(currConvId)}
>
Stop
</button>
) : (
<button
className="btn btn-primary ml-2"
onClick={sendNewMessage}
disabled={inputMsg.trim().length === 0}
>
Send
</button>
)}
</div>
</>
);
}

View file

@ -0,0 +1,182 @@
import { useEffect, useState } from 'react';
import StorageUtils from '../utils/storage';
import { useAppContext } from '../utils/app.context';
import { classNames } from '../utils/misc';
import daisyuiThemes from 'daisyui/src/theming/themes';
import { THEMES } from '../Config';
import { useNavigate } from 'react-router';
import SettingDialog from './SettingDialog';
export default function Header() {
const navigate = useNavigate();
const [selectedTheme, setSelectedTheme] = useState(StorageUtils.getTheme());
const [showSettingDialog, setShowSettingDialog] = useState(false);
const setTheme = (theme: string) => {
StorageUtils.setTheme(theme);
setSelectedTheme(theme);
};
useEffect(() => {
document.body.setAttribute('data-theme', selectedTheme);
document.body.setAttribute(
'data-color-scheme',
// @ts-expect-error daisyuiThemes complains about index type, but it should work
daisyuiThemes[selectedTheme]?.['color-scheme'] ?? 'auto'
);
}, [selectedTheme]);
const { isGenerating, viewingConversation } = useAppContext();
const isCurrConvGenerating = isGenerating(viewingConversation?.id ?? '');
const removeConversation = () => {
if (isCurrConvGenerating || !viewingConversation) return;
const convId = viewingConversation.id;
if (window.confirm('Are you sure to delete this conversation?')) {
StorageUtils.remove(convId);
navigate('/');
}
};
const downloadConversation = () => {
if (isCurrConvGenerating || !viewingConversation) return;
const convId = viewingConversation.id;
const conversationJson = JSON.stringify(viewingConversation, null, 2);
const blob = new Blob([conversationJson], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `conversation_${convId}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
return (
<div className="flex flex-row items-center mt-6 mb-6">
{/* open sidebar button */}
<label htmlFor="toggle-drawer" className="btn btn-ghost lg:hidden">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-list"
viewBox="0 0 16 16"
>
<path
fillRule="evenodd"
d="M2.5 12a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5m0-4a.5.5 0 0 1 .5-.5h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5"
/>
</svg>
</label>
<div className="grow text-2xl font-bold ml-2">llama.cpp</div>
{/* action buttons (top right) */}
<div className="flex items-center">
<div v-if="messages.length > 0" className="dropdown dropdown-end">
{/* "..." button */}
<button
tabIndex={0}
role="button"
className="btn m-1"
disabled={isCurrConvGenerating}
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-three-dots-vertical"
viewBox="0 0 16 16"
>
<path d="M9.5 13a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0m0-5a1.5 1.5 0 1 1-3 0 1.5 1.5 0 0 1 3 0" />
</svg>
</button>
{/* dropdown menu */}
<ul
tabIndex={0}
className="dropdown-content menu bg-base-100 rounded-box z-[1] w-52 p-2 shadow"
>
<li onClick={downloadConversation}>
<a>Download</a>
</li>
<li className="text-error" onClick={removeConversation}>
<a>Delete</a>
</li>
</ul>
</div>
<div className="tooltip tooltip-bottom" data-tip="Settings">
<button className="btn" onClick={() => setShowSettingDialog(true)}>
{/* settings button */}
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-gear"
viewBox="0 0 16 16"
>
<path d="M8 4.754a3.246 3.246 0 1 0 0 6.492 3.246 3.246 0 0 0 0-6.492M5.754 8a2.246 2.246 0 1 1 4.492 0 2.246 2.246 0 0 1-4.492 0" />
<path d="M9.796 1.343c-.527-1.79-3.065-1.79-3.592 0l-.094.319a.873.873 0 0 1-1.255.52l-.292-.16c-1.64-.892-3.433.902-2.54 2.541l.159.292a.873.873 0 0 1-.52 1.255l-.319.094c-1.79.527-1.79 3.065 0 3.592l.319.094a.873.873 0 0 1 .52 1.255l-.16.292c-.892 1.64.901 3.434 2.541 2.54l.292-.159a.873.873 0 0 1 1.255.52l.094.319c.527 1.79 3.065 1.79 3.592 0l.094-.319a.873.873 0 0 1 1.255-.52l.292.16c1.64.893 3.434-.902 2.54-2.541l-.159-.292a.873.873 0 0 1 .52-1.255l.319-.094c1.79-.527 1.79-3.065 0-3.592l-.319-.094a.873.873 0 0 1-.52-1.255l.16-.292c.893-1.64-.902-3.433-2.541-2.54l-.292.159a.873.873 0 0 1-1.255-.52zm-2.633.283c.246-.835 1.428-.835 1.674 0l.094.319a1.873 1.873 0 0 0 2.693 1.115l.291-.16c.764-.415 1.6.42 1.184 1.185l-.159.292a1.873 1.873 0 0 0 1.116 2.692l.318.094c.835.246.835 1.428 0 1.674l-.319.094a1.873 1.873 0 0 0-1.115 2.693l.16.291c.415.764-.42 1.6-1.185 1.184l-.291-.159a1.873 1.873 0 0 0-2.693 1.116l-.094.318c-.246.835-1.428.835-1.674 0l-.094-.319a1.873 1.873 0 0 0-2.692-1.115l-.292.16c-.764.415-1.6-.42-1.184-1.185l.159-.291A1.873 1.873 0 0 0 1.945 8.93l-.319-.094c-.835-.246-.835-1.428 0-1.674l.319-.094A1.873 1.873 0 0 0 3.06 4.377l-.16-.292c-.415-.764.42-1.6 1.185-1.184l.292.159a1.873 1.873 0 0 0 2.692-1.115z" />
</svg>
</button>
</div>
{/* theme controller is copied from https://daisyui.com/components/theme-controller/ */}
<div className="tooltip tooltip-bottom" data-tip="Themes">
<div className="dropdown dropdown-end dropdown-bottom">
<div tabIndex={0} role="button" className="btn m-1">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-palette2"
viewBox="0 0 16 16"
>
<path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z" />
</svg>
</div>
<ul
tabIndex={0}
className="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto"
>
<li>
<button
className={classNames({
'btn btn-sm btn-block btn-ghost justify-start': true,
'btn-active': selectedTheme === 'auto',
})}
onClick={() => setTheme('auto')}
>
auto
</button>
</li>
{THEMES.map((theme) => (
<li key={theme}>
<input
type="radio"
name="theme-dropdown"
className="theme-controller btn btn-sm btn-block btn-ghost justify-start"
aria-label={theme}
value={theme}
checked={selectedTheme === theme}
onChange={(e) => e.target.checked && setTheme(theme)}
/>
</li>
))}
</ul>
</div>
</div>
</div>
<SettingDialog
show={showSettingDialog}
onClose={() => setShowSettingDialog(false)}
/>
</div>
);
}

View file

@ -0,0 +1,254 @@
import React, { useMemo, useState } from 'react';
import Markdown, { ExtraProps } from 'react-markdown';
import remarkGfm from 'remark-gfm';
import rehypeHightlight from 'rehype-highlight';
import rehypeKatex from 'rehype-katex';
import remarkMath from 'remark-math';
import remarkBreaks from 'remark-breaks';
import 'katex/dist/katex.min.css';
import { classNames, copyStr } from '../utils/misc';
import { ElementContent, Root } from 'hast';
import { visit } from 'unist-util-visit';
export default function MarkdownDisplay({ content }: { content: string }) {
const preprocessedContent = useMemo(
() => preprocessLaTeX(content),
[content]
);
return (
<Markdown
remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
rehypePlugins={[rehypeHightlight, rehypeKatex, rehypeCustomCopyButton]}
components={{
button: (props) => (
<CopyCodeButton {...props} origContent={preprocessedContent} />
),
}}
>
{preprocessedContent}
</Markdown>
);
}
const CopyCodeButton: React.ElementType<
React.ClassAttributes<HTMLButtonElement> &
React.HTMLAttributes<HTMLButtonElement> &
ExtraProps & { origContent: string }
> = ({ node, origContent }) => {
const startOffset = node?.position?.start.offset ?? 0;
const endOffset = node?.position?.end.offset ?? 0;
const copiedContent = useMemo(
() =>
origContent
.substring(startOffset, endOffset)
.replace(/^```[^\n]+\n/g, '')
.replace(/```$/g, ''),
[origContent, startOffset, endOffset]
);
return (
<div
className={classNames({
'text-right sticky top-4 mb-2 mr-2 h-0': true,
'display-none': !node?.position,
})}
>
<CopyButton className="badge btn-mini" content={copiedContent} />
</div>
);
};
export const CopyButton = ({
content,
className,
}: {
content: string;
className?: string;
}) => {
const [copied, setCopied] = useState(false);
return (
<button
className={className}
onClick={() => {
copyStr(content);
setCopied(true);
}}
onMouseLeave={() => setCopied(false)}
>
{copied ? 'Copied!' : '📋 Copy'}
</button>
);
};
/**
* This injects the "button" element before each "pre" element.
* The actual button will be replaced with a react component in the MarkdownDisplay.
* We don't replace "pre" node directly because it will cause the node to re-render, which causes this bug: https://github.com/ggerganov/llama.cpp/issues/9608
*/
function rehypeCustomCopyButton() {
return function (tree: Root) {
visit(tree, 'element', function (node) {
if (node.tagName === 'pre' && !node.properties.visited) {
const preNode = { ...node };
// replace current node
preNode.properties.visited = 'true';
node.tagName = 'div';
node.properties = {
className: 'relative my-4',
};
// add node for button
const btnNode: ElementContent = {
type: 'element',
tagName: 'button',
properties: {},
children: [],
position: node.position,
};
node.children = [btnNode, preNode];
}
});
};
}
/**
* The part below is copied and adapted from:
* https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
* (MIT License)
*/
// Regex to check if the processed content contains any potential LaTeX patterns
const containsLatexRegex =
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
// Regex for inline and block LaTeX expressions
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
// Function to restore code blocks
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
return content.replace(
/<<CODE_BLOCK_(\d+)>>/g,
(_, index) => codeBlocks[index]
);
};
// Regex to identify code blocks and inline code
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
export const processLaTeX = (_content: string) => {
let content = _content;
// Temporarily replace code blocks and inline code with placeholders
const codeBlocks: string[] = [];
let index = 0;
content = content.replace(codeBlockRegex, (match) => {
codeBlocks[index] = match;
return `<<CODE_BLOCK_${index++}>>`;
});
// Escape dollar signs followed by a digit or space and digit
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
// If no LaTeX patterns are found, restore code blocks and return the processed content
if (!containsLatexRegex.test(processedContent)) {
return restoreCodeBlocks(processedContent, codeBlocks);
}
// Convert LaTeX expressions to a markdown compatible format
processedContent = processedContent
.replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
.replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
// Restore code blocks
return restoreCodeBlocks(processedContent, codeBlocks);
};
/**
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
*
* @param content The input string containing LaTeX expressions.
* @returns The processed string with replaced delimiters and escaped characters.
*/
export function preprocessLaTeX(content: string): string {
// Step 1: Protect code blocks
const codeBlocks: string[] = [];
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
codeBlocks.push(code);
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
});
// Step 2: Protect existing LaTeX expressions
const latexExpressions: string[] = [];
// Protect block math ($$...$$), \[...\], and \(...\) as before.
content = content.replace(
/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
(match) => {
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
}
);
// Protect inline math ($...$) only if it does NOT match a currency pattern.
// We assume a currency pattern is one where the inner content is purely numeric (with optional decimals).
content = content.replace(/\$([^$]+)\$/g, (match, inner) => {
if (/^\s*\d+(?:\.\d+)?\s*$/.test(inner)) {
// This looks like a currency value (e.g. "$123" or "$12.34"),
// so don't protect it.
return match;
} else {
// Otherwise, treat it as a LaTeX expression.
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
}
});
// Step 3: Escape dollar signs that are likely currency indicators.
// (Now that inline math is protected, this will only escape dollars not already protected)
content = content.replace(/\$(?=\d)/g, '\\$');
// Step 4: Restore LaTeX expressions
content = content.replace(
/<<LATEX_(\d+)>>/g,
(_, index) => latexExpressions[parseInt(index)]
);
// Step 5: Restore code blocks
content = content.replace(
/<<CODE_BLOCK_(\d+)>>/g,
(_, index) => codeBlocks[parseInt(index)]
);
// Step 6: Apply additional escaping functions
content = escapeBrackets(content);
content = escapeMhchem(content);
return content;
}
export function escapeBrackets(text: string): string {
const pattern =
/(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
return text.replace(
pattern,
(
match: string,
codeBlock: string | undefined,
squareBracket: string | undefined,
roundBracket: string | undefined
): string => {
if (codeBlock != null) {
return codeBlock;
} else if (squareBracket != null) {
return `$$${squareBracket}$$`;
} else if (roundBracket != null) {
return `$${roundBracket}$`;
}
return match;
}
);
}
export function escapeMhchem(text: string) {
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
}

View file

@ -0,0 +1,306 @@
import { useState } from 'react';
import { useAppContext } from '../utils/app.context';
import { CONFIG_DEFAULT, CONFIG_INFO } from '../Config';
import { isDev } from '../Config';
import StorageUtils from '../utils/storage';
type SettKey = keyof typeof CONFIG_DEFAULT;
const COMMON_SAMPLER_KEYS: SettKey[] = [
'temperature',
'top_k',
'top_p',
'min_p',
'max_tokens',
];
const OTHER_SAMPLER_KEYS: SettKey[] = [
'dynatemp_range',
'dynatemp_exponent',
'typical_p',
'xtc_probability',
'xtc_threshold',
];
const PENALTY_KEYS: SettKey[] = [
'repeat_last_n',
'repeat_penalty',
'presence_penalty',
'frequency_penalty',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_penalty_last_n',
];
export default function SettingDialog({
show,
onClose,
}: {
show: boolean;
onClose: () => void;
}) {
const { config, saveConfig } = useAppContext();
// clone the config object to prevent direct mutation
const [localConfig, setLocalConfig] = useState<typeof CONFIG_DEFAULT>(
JSON.parse(JSON.stringify(config))
);
const resetConfig = () => {
if (window.confirm('Are you sure to reset all settings?')) {
setLocalConfig(CONFIG_DEFAULT);
}
};
const handleSave = () => {
saveConfig(localConfig);
onClose();
};
const debugImportDemoConv = async () => {
const res = await fetch('/demo-conversation.json');
const demoConv = await res.json();
StorageUtils.remove(demoConv.id);
for (const msg of demoConv.messages) {
StorageUtils.appendMsg(demoConv.id, msg);
}
onClose();
};
return (
<dialog className={`modal ${show ? 'modal-open' : ''}`}>
<div className="modal-box">
<h3 className="text-lg font-bold mb-6">Settings</h3>
<div className="h-[calc(90vh-12rem)] overflow-y-auto">
<p className="opacity-40 mb-6">
Settings below are saved in browser's localStorage
</p>
<SettingsModalShortInput
configKey="apiKey"
configDefault={CONFIG_DEFAULT}
value={localConfig.apiKey}
onChange={(value) =>
setLocalConfig({ ...localConfig, apiKey: value })
}
/>
<label className="form-control mb-2">
<div className="label">
System Message (will be disabled if left empty)
</div>
<textarea
className="textarea textarea-bordered h-24"
placeholder={`Default: ${CONFIG_DEFAULT.systemMessage}`}
value={localConfig.systemMessage}
onChange={(e) =>
setLocalConfig({
...localConfig,
systemMessage: e.target.value,
})
}
/>
</label>
{COMMON_SAMPLER_KEYS.map((key) => (
<SettingsModalShortInput
key={key}
configKey={key}
configDefault={CONFIG_DEFAULT}
value={localConfig[key]}
onChange={(value) =>
setLocalConfig({ ...localConfig, [key]: value })
}
/>
))}
<details className="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary className="collapse-title font-bold">
Other sampler settings
</summary>
<div className="collapse-content">
<SettingsModalShortInput
label="Samplers queue"
configKey="samplers"
configDefault={CONFIG_DEFAULT}
value={localConfig.samplers}
onChange={(value) =>
setLocalConfig({ ...localConfig, samplers: value })
}
/>
{OTHER_SAMPLER_KEYS.map((key) => (
<SettingsModalShortInput
key={key}
configKey={key}
configDefault={CONFIG_DEFAULT}
value={localConfig[key]}
onChange={(value) =>
setLocalConfig({ ...localConfig, [key]: value })
}
/>
))}
</div>
</details>
<details className="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary className="collapse-title font-bold">
Penalties settings
</summary>
<div className="collapse-content">
{PENALTY_KEYS.map((key) => (
<SettingsModalShortInput
key={key}
configKey={key}
configDefault={CONFIG_DEFAULT}
value={localConfig[key]}
onChange={(value) =>
setLocalConfig({ ...localConfig, [key]: value })
}
/>
))}
</div>
</details>
<details className="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary className="collapse-title font-bold">
Reasoning models
</summary>
<div className="collapse-content">
<div className="flex flex-row items-center mb-2">
<input
type="checkbox"
className="checkbox"
checked={localConfig.showThoughtInProgress}
onChange={(e) =>
setLocalConfig({
...localConfig,
showThoughtInProgress: e.target.checked,
})
}
/>
<span className="ml-4">
Expand though process by default for generating message
</span>
</div>
<div className="flex flex-row items-center mb-2">
<input
type="checkbox"
className="checkbox"
checked={localConfig.excludeThoughtOnReq}
onChange={(e) =>
setLocalConfig({
...localConfig,
excludeThoughtOnReq: e.target.checked,
})
}
/>
<span className="ml-4">
Exclude thought process when sending request to API
(Recommended for DeepSeek-R1)
</span>
</div>
</div>
</details>
<details className="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
<summary className="collapse-title font-bold">
Advanced config
</summary>
<div className="collapse-content">
{/* this button only shows in dev mode, used to import a demo conversation to test message rendering */}
{isDev && (
<div className="flex flex-row items-center mb-2">
<button className="btn" onClick={debugImportDemoConv}>
(debug) Import demo conversation
</button>
</div>
)}
<div className="flex flex-row items-center mb-2">
<input
type="checkbox"
className="checkbox"
checked={localConfig.showTokensPerSecond}
onChange={(e) =>
setLocalConfig({
...localConfig,
showTokensPerSecond: e.target.checked,
})
}
/>
<span className="ml-4">Show tokens per second</span>
</div>
<label className="form-control mb-2">
<div className="label inline">
Custom JSON config (For more info, refer to{' '}
<a
className="underline"
href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md"
target="_blank"
rel="noopener noreferrer"
>
server documentation
</a>
)
</div>
<textarea
className="textarea textarea-bordered h-24"
placeholder='Example: { "mirostat": 1, "min_p": 0.1 }'
value={localConfig.custom}
onChange={(e) =>
setLocalConfig({ ...localConfig, custom: e.target.value })
}
/>
</label>
</div>
</details>
</div>
<div className="modal-action">
<button className="btn" onClick={resetConfig}>
Reset to default
</button>
<button className="btn" onClick={onClose}>
Close
</button>
<button className="btn btn-primary" onClick={handleSave}>
Save
</button>
</div>
</div>
</dialog>
);
}
function SettingsModalShortInput({
configKey,
configDefault,
value,
onChange,
label,
}: {
configKey: SettKey;
configDefault: typeof CONFIG_DEFAULT;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
value: any;
onChange: (value: string) => void;
label?: string;
}) {
return (
<label className="input input-bordered join-item grow flex items-center gap-2 mb-2">
<div className="dropdown dropdown-hover">
<div tabIndex={0} role="button" className="font-bold">
{label || configKey}
</div>
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
{CONFIG_INFO[configKey] ?? '(no help message available)'}
</div>
</div>
<input
type="text"
className="grow"
placeholder={`Default: ${configDefault[configKey] || 'none'}`}
value={value}
onChange={(e) => onChange(e.target.value)}
/>
</label>
);
}

View file

@ -0,0 +1,95 @@
import { useEffect, useMemo, useState } from 'react';
import { classNames } from '../utils/misc';
import { Conversation } from '../utils/types';
import StorageUtils from '../utils/storage';
import { useNavigate, useParams } from 'react-router';
export default function Sidebar() {
const params = useParams();
const navigate = useNavigate();
const currConv = useMemo(
() => StorageUtils.getOneConversation(params.convId ?? ''),
[params.convId]
);
const [conversations, setConversations] = useState<Conversation[]>([]);
useEffect(() => {
const handleConversationChange = () => {
setConversations(StorageUtils.getAllConversations());
};
StorageUtils.onConversationChanged(handleConversationChange);
handleConversationChange();
return () => {
StorageUtils.offConversationChanged(handleConversationChange);
};
}, []);
return (
<>
<input
id="toggle-drawer"
type="checkbox"
className="drawer-toggle"
defaultChecked
/>
<div className="drawer-side h-screen lg:h-screen z-50 lg:max-w-64">
<label
htmlFor="toggle-drawer"
aria-label="close sidebar"
className="drawer-overlay"
></label>
<div className="flex flex-col bg-base-200 min-h-full max-w-64 py-4 px-4">
<div className="flex flex-row items-center justify-between mb-4 mt-4">
<h2 className="font-bold ml-4">Conversations</h2>
{/* close sidebar button */}
<label htmlFor="toggle-drawer" className="btn btn-ghost lg:hidden">
<svg
xmlns="http://www.w3.org/2000/svg"
width="16"
height="16"
fill="currentColor"
className="bi bi-arrow-bar-left"
viewBox="0 0 16 16"
>
<path
fillRule="evenodd"
d="M12.5 15a.5.5 0 0 1-.5-.5v-13a.5.5 0 0 1 1 0v13a.5.5 0 0 1-.5.5M10 8a.5.5 0 0 1-.5.5H3.707l2.147 2.146a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 1 1 .708.708L3.707 7.5H9.5a.5.5 0 0 1 .5.5"
/>
</svg>
</label>
</div>
{/* list of conversations */}
<div
className={classNames({
'btn btn-ghost justify-start': true,
'btn-active': !currConv,
})}
onClick={() => navigate('/')}
>
+ New conversation
</div>
{conversations.map((conv) => (
<div
key={conv.id}
className={classNames({
'btn btn-ghost justify-start font-normal': true,
'btn-active': conv.id === currConv?.id,
})}
onClick={() => navigate(`/chat/${conv.id}`)}
dir="auto"
>
<span className="truncate">{conv.messages[0].content}</span>
</div>
))}
<div className="text-center text-xs opacity-40 mt-auto mx-4">
Conversations are saved to browser's localStorage
</div>
</div>
</div>
</>
);
}

View file

@ -1,60 +0,0 @@
import hljs from 'highlight.js/lib/core';
// only import commonly used languages to reduce bundle size
import python from 'highlight.js/lib/languages/python';
import javascript from 'highlight.js/lib/languages/javascript';
import json from 'highlight.js/lib/languages/json';
import bash from 'highlight.js/lib/languages/bash';
import yaml from 'highlight.js/lib/languages/yaml';
import markdown from 'highlight.js/lib/languages/markdown';
import scss from 'highlight.js/lib/languages/scss';
import xml from 'highlight.js/lib/languages/xml';
import ruby from 'highlight.js/lib/languages/ruby';
import go from 'highlight.js/lib/languages/go';
import java from 'highlight.js/lib/languages/java';
import rust from 'highlight.js/lib/languages/rust';
import scala from 'highlight.js/lib/languages/scala';
import cpp from 'highlight.js/lib/languages/cpp';
import csharp from 'highlight.js/lib/languages/csharp';
import swift from 'highlight.js/lib/languages/swift';
import dart from 'highlight.js/lib/languages/dart';
import elixir from 'highlight.js/lib/languages/elixir';
import kotlin from 'highlight.js/lib/languages/kotlin';
import lua from 'highlight.js/lib/languages/lua';
import php from 'highlight.js/lib/languages/php';
import latex from 'highlight.js/lib/languages/latex';
hljs.registerLanguage('python', python);
hljs.registerLanguage('javascript', javascript);
hljs.registerLanguage('json', json);
hljs.registerLanguage('yaml', yaml);
hljs.registerLanguage('markdown', markdown);
hljs.registerLanguage('xml', xml);
hljs.registerLanguage('ruby', ruby);
hljs.registerLanguage('go', go);
hljs.registerLanguage('java', java);
hljs.registerLanguage('rust', rust);
hljs.registerLanguage('scala', scala);
hljs.registerLanguage('csharp', csharp);
hljs.registerLanguage('swift', swift);
hljs.registerLanguage('dart', dart);
hljs.registerLanguage('elixir', elixir);
hljs.registerLanguage('kotlin', kotlin);
hljs.registerLanguage('lua', lua);
hljs.registerLanguage('php', php);
hljs.registerLanguage('latex', latex);
// reuse some languages to further reduce bundle size
hljs.registerLanguage('shell', bash);
hljs.registerLanguage('bash', bash);
hljs.registerLanguage('sh', bash);
hljs.registerLanguage('css', scss);
hljs.registerLanguage('scss', scss);
hljs.registerLanguage('c', cpp);
hljs.registerLanguage('cpp', cpp);
export default hljs;

View file

@ -1,15 +1,28 @@
@use "sass:meta";
@use 'sass:meta';
@tailwind base;
@tailwind components;
@tailwind utilities;
.markdown {
h1, h2, h3, h4, h5, h6, ul, ol, li { all: revert; }
h1,
h2,
h3,
h4,
h5,
h6,
ul,
ol,
li {
all: revert;
}
pre {
@apply whitespace-pre-wrap rounded-lg p-2;
border: 1px solid currentColor;
}
p {
@apply mb-2;
}
/* TODO: fix markdown table */
}
@ -19,7 +32,9 @@
.btn-mini {
@apply cursor-pointer hover:shadow-md;
}
.chat-screen { max-width: 900px; }
.chat-screen {
max-width: 900px;
}
.chat-bubble-base-300 {
--tw-bg-opacity: 1;
@ -46,3 +61,7 @@
background: transparent !important;
padding: 0.5em !important;
}
.katex-display {
margin: 0 0 !important;
}

View file

@ -1,66 +0,0 @@
import katex from 'katex';
// Adapted from https://github.com/SchneeHertz/markdown-it-katex-gpt
// MIT license
const defaultOptions = {
delimiters: [
{ left: '\\[', right: '\\]', display: true },
{ left: '\\(', right: '\\)', display: false },
],
};
export function renderLatexHTML(content, display = false) {
return katex.renderToString(content, {
throwOnError: false,
output: 'mathml',
displayMode: display,
});
}
function escapedBracketRule(options) {
return (state, silent) => {
const max = state.posMax;
const start = state.pos;
for (const { left, right, display } of options.delimiters) {
// Check if it starts with the left delimiter
if (!state.src.slice(start).startsWith(left)) continue;
// Skip the length of the left delimiter
let pos = start + left.length;
// Find the matching right delimiter
while (pos < max) {
if (state.src.slice(pos).startsWith(right)) {
break;
}
pos++;
}
// No matching right delimiter found, skip to the next match
if (pos >= max) continue;
// If not in silent mode, convert LaTeX formula to MathML
if (!silent) {
const content = state.src.slice(start + left.length, pos);
try {
const renderedContent = renderLatexHTML(content, display);
const token = state.push('html_inline', '', 0);
token.content = renderedContent;
} catch (e) {
console.error(e);
}
}
// Update position, skip the length of the right delimiter
state.pos = pos + right.length;
return true;
}
}
}
export default function (md, options = defaultOptions) {
md.inline.ruler.after('text', 'escaped_bracket', escapedBracketRule(options));
}

View file

@ -1,701 +0,0 @@
import './styles.scss';
import { createApp, defineComponent, shallowRef, computed, h } from 'vue/dist/vue.esm-bundler.js';
import MarkdownIt from 'markdown-it';
import TextLineStream from 'textlinestream';
// math formula rendering
import 'katex/dist/katex.min.css';
import markdownItKatexGpt from './katex-gpt';
import markdownItKatexNormal from '@vscode/markdown-it-katex';
// code highlighting
import hljs from './highlight-config';
import daisyuiThemes from 'daisyui/src/theming/themes';
// ponyfill for missing ReadableStream asyncIterator on Safari
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
const isDev = import.meta.env.MODE === 'development';
// types
/** @typedef {{ id: number, role: 'user' | 'assistant', content: string, timings: any }} Message */
/** @typedef {{ role: 'user' | 'assistant', content: string }} APIMessage */
/** @typedef {{ id: string, lastModified: number, messages: Array<Message> }} Conversation */
// utility functions
const isString = (x) => !!x.toLowerCase;
const isBoolean = (x) => x === true || x === false;
const isNumeric = (n) => !isString(n) && !isNaN(n) && !isBoolean(n);
const escapeAttr = (str) => str.replace(/>/g, '&gt;').replace(/"/g, '&quot;');
const copyStr = (textToCopy) => {
// Navigator clipboard api needs a secure context (https)
if (navigator.clipboard && window.isSecureContext) {
navigator.clipboard.writeText(textToCopy);
} else {
// Use the 'out of viewport hidden text area' trick
const textArea = document.createElement('textarea');
textArea.value = textToCopy;
// Move textarea out of the viewport so it's not visible
textArea.style.position = 'absolute';
textArea.style.left = '-999999px';
document.body.prepend(textArea);
textArea.select();
document.execCommand('copy');
}
};
// constants
const BASE_URL = isDev
? (localStorage.getItem('base') || 'https://localhost:8080') // for debugging
: (new URL('.', document.baseURI).href).toString().replace(/\/$/, ''); // for production
console.log({ BASE_URL });
const CONFIG_DEFAULT = {
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
apiKey: '',
systemMessage: 'You are a helpful assistant.',
showTokensPerSecond: false,
showThoughtInProgress: false,
excludeThoughtOnReq: true,
// make sure these default values are in sync with `common.h`
samplers: 'edkypmxt',
temperature: 0.8,
dynatemp_range: 0.0,
dynatemp_exponent: 1.0,
top_k: 40,
top_p: 0.95,
min_p: 0.05,
xtc_probability: 0.0,
xtc_threshold: 0.1,
typical_p: 1.0,
repeat_last_n: 64,
repeat_penalty: 1.0,
presence_penalty: 0.0,
frequency_penalty: 0.0,
dry_multiplier: 0.0,
dry_base: 1.75,
dry_allowed_length: 2,
dry_penalty_last_n: -1,
max_tokens: -1,
custom: '', // custom json-stringified object
};
const CONFIG_INFO = {
apiKey: 'Set the API Key if you are using --api-key option for the server.',
systemMessage: 'The starting message that defines how model should behave.',
samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
temperature: 'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
dynatemp_range: 'Addon for the temperature sampler. The added value to the range of dynamic temperature, which adjusts probabilities by entropy of tokens.',
dynatemp_exponent: 'Addon for the temperature sampler. Smoothes out the probability redistribution based on the most probable token.',
top_k: 'Keeps only k top tokens.',
top_p: 'Limits tokens to those that together have a cumulative probability of at least p',
min_p: 'Limits tokens based on the minimum probability for a token to be considered, relative to the probability of the most likely token.',
xtc_probability: 'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
xtc_threshold: 'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
typical_p: 'Sorts and limits tokens based on the difference between log-probability and entropy.',
repeat_last_n: 'Last n tokens to consider for penalizing repetition',
repeat_penalty: 'Controls the repetition of token sequences in the generated text',
presence_penalty: 'Limits tokens based on whether they appear in the output or not.',
frequency_penalty: 'Limits tokens based on how often they appear in the output.',
dry_multiplier: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling multiplier.',
dry_base: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the DRY sampling base value.',
dry_allowed_length: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets the allowed length for DRY sampling.',
dry_penalty_last_n: 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.',
max_tokens: 'The maximum number of token per output.',
custom: '', // custom json-stringified object
};
// config keys having numeric value (i.e. temperature, top_k, top_p, etc)
const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT).filter(e => isNumeric(e[1])).map(e => e[0]);
// list of themes supported by daisyui
const THEMES = ['light', 'dark']
// make sure light & dark are always at the beginning
.concat(Object.keys(daisyuiThemes).filter(t => t !== 'light' && t !== 'dark'));
// markdown support
const VueMarkdown = defineComponent(
(props) => {
const md = shallowRef(new MarkdownIt({
breaks: true,
highlight: function (str, lang) { // Add highlight.js
if (lang && hljs.getLanguage(lang)) {
try {
return '<pre dir="auto"><code class="hljs">' +
hljs.highlight(str, { language: lang, ignoreIllegals: true }).value +
'</code></pre>';
} catch (__) {}
}
return '<pre dir="auto"><code class="hljs">' + md.value.utils.escapeHtml(str) + '</code></pre>';
}
}));
// support latex with double dollar sign and square brackets
md.value.use(markdownItKatexGpt, {
delimiters: [
{ left: '\\[', right: '\\]', display: true },
{ left: '\\(', right: '\\)', display: false },
{ left: '$$', right: '$$', display: false },
// do not add single dollar sign here, other wise it will confused with dollar used for money symbol
],
throwOnError: false,
});
// support latex with single dollar sign
md.value.use(markdownItKatexNormal, { throwOnError: false });
// add copy button to code blocks
const origFenchRenderer = md.value.renderer.rules.fence;
md.value.renderer.rules.fence = (tokens, idx, ...args) => {
const content = tokens[idx].content;
const origRendered = origFenchRenderer(tokens, idx, ...args);
return `<div class="relative my-4">
<div class="text-right sticky top-4 mb-2 mr-2 h-0">
<button class="badge btn-mini" onclick="copyStr(${escapeAttr(JSON.stringify(content))})">📋 Copy</button>
</div>
${origRendered}
</div>`;
};
window.copyStr = copyStr;
const content = computed(() => md.value.render(props.source));
return () => h('div', { innerHTML: content.value });
},
{ props: ['source'] }
);
// input field to be used by settings modal
const SettingsModalShortInput = defineComponent({
template: document.getElementById('settings-modal-short-input').innerHTML,
props: {
label: { type: String, required: false },
configKey: String,
configDefault: Object,
configInfo: Object,
modelValue: [Object, String, Number],
},
});
// message bubble component
const MessageBubble = defineComponent({
components: {
VueMarkdown
},
template: document.getElementById('message-bubble').innerHTML,
props: {
config: Object,
msg: Object,
isGenerating: Boolean,
showThoughtInProgress: Boolean,
editUserMsgAndRegenerate: Function,
regenerateMsg: Function,
},
data() {
return {
editingContent: null,
};
},
computed: {
timings() {
if (!this.msg.timings) return null;
return {
...this.msg.timings,
prompt_per_second: this.msg.timings.prompt_n / (this.msg.timings.prompt_ms / 1000),
predicted_per_second: this.msg.timings.predicted_n / (this.msg.timings.predicted_ms / 1000),
};
},
splitMsgContent() {
const content = this.msg.content;
if (this.msg.role !== 'assistant') {
return { content };
}
let actualContent = '';
let cot = '';
let isThinking = false;
let thinkSplit = content.split('<think>', 2);
actualContent += thinkSplit[0];
while (thinkSplit[1] !== undefined) {
// <think> tag found
thinkSplit = thinkSplit[1].split('</think>', 2);
cot += thinkSplit[0];
isThinking = true;
if (thinkSplit[1] !== undefined) {
// </think> closing tag found
isThinking = false;
thinkSplit = thinkSplit[1].split('<think>', 2);
actualContent += thinkSplit[0];
}
}
return { content: actualContent, cot, isThinking };
},
},
methods: {
copyMsg() {
copyStr(this.msg.content);
},
editMsg() {
this.editUserMsgAndRegenerate({
...this.msg,
content: this.editingContent,
});
this.editingContent = null;
},
},
});
// coversations is stored in localStorage
// format: { [convId]: { id: string, lastModified: number, messages: [...] } }
// convId is a string prefixed with 'conv-'
const StorageUtils = {
/**
* manage conversations
* @returns {Array<Conversation>}
*/
getAllConversations() {
const res = [];
for (const key in localStorage) {
if (key.startsWith('conv-')) {
res.push(JSON.parse(localStorage.getItem(key)));
}
}
res.sort((a, b) => b.lastModified - a.lastModified);
return res;
},
/**
* can return null if convId does not exist
* @param {string} convId
* @returns {Conversation | null}
*/
getOneConversation(convId) {
return JSON.parse(localStorage.getItem(convId) || 'null');
},
/**
* if convId does not exist, create one
* @param {string} convId
* @param {Message} msg
*/
appendMsg(convId, msg) {
if (msg.content === null) return;
const conv = StorageUtils.getOneConversation(convId) || {
id: convId,
lastModified: Date.now(),
messages: [],
};
conv.messages.push(msg);
conv.lastModified = Date.now();
localStorage.setItem(convId, JSON.stringify(conv));
},
/**
* Get new conversation id
* @returns {string}
*/
getNewConvId() {
return `conv-${Date.now()}`;
},
/**
* remove conversation by id
* @param {string} convId
*/
remove(convId) {
localStorage.removeItem(convId);
},
/**
* remove all conversations
* @param {string} convId
*/
filterAndKeepMsgs(convId, predicate) {
const conv = StorageUtils.getOneConversation(convId);
if (!conv) return;
conv.messages = conv.messages.filter(predicate);
conv.lastModified = Date.now();
localStorage.setItem(convId, JSON.stringify(conv));
},
/**
* remove last message from conversation
* @param {string} convId
* @returns {Message | undefined}
*/
popMsg(convId) {
const conv = StorageUtils.getOneConversation(convId);
if (!conv) return;
const msg = conv.messages.pop();
conv.lastModified = Date.now();
if (conv.messages.length === 0) {
StorageUtils.remove(convId);
} else {
localStorage.setItem(convId, JSON.stringify(conv));
}
return msg;
},
// manage config
getConfig() {
const savedVal = JSON.parse(localStorage.getItem('config') || '{}');
// to prevent breaking changes in the future, we always provide default value for missing keys
return {
...CONFIG_DEFAULT,
...savedVal,
};
},
setConfig(config) {
localStorage.setItem('config', JSON.stringify(config));
},
getTheme() {
return localStorage.getItem('theme') || 'auto';
},
setTheme(theme) {
if (theme === 'auto') {
localStorage.removeItem('theme');
} else {
localStorage.setItem('theme', theme);
}
},
};
// scroll to bottom of chat messages
// if requiresNearBottom is true, only auto-scroll if user is near bottom
const chatScrollToBottom = (requiresNearBottom) => {
const msgListElem = document.getElementById('messages-list');
const spaceToBottom = msgListElem.scrollHeight - msgListElem.scrollTop - msgListElem.clientHeight;
if (!requiresNearBottom || (spaceToBottom < 100)) {
setTimeout(() => msgListElem.scrollTo({ top: msgListElem.scrollHeight }), 1);
}
};
// wrapper for SSE
async function* sendSSEPostRequest(url, fetchOptions) {
const res = await fetch(url, fetchOptions);
const lines = res.body
.pipeThrough(new TextDecoderStream())
.pipeThrough(new TextLineStream());
for await (const line of asyncIterator(lines)) {
if (isDev) console.log({line});
if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
const data = JSON.parse(line.slice(5));
yield data;
} else if (line.startsWith('error:')) {
const data = JSON.parse(line.slice(6));
throw new Error(data.message || 'Unknown error');
}
}
};
const mainApp = createApp({
components: {
VueMarkdown,
SettingsModalShortInput,
MessageBubble,
},
data() {
return {
conversations: StorageUtils.getAllConversations(),
/** @type {Array<Message>} */
messages: [],
viewingConvId: StorageUtils.getNewConvId(),
inputMsg: '',
isGenerating: false,
/** @type {Array<Message> | null} */
pendingMsg: null, // the on-going message from assistant
stopGeneration: () => {},
selectedTheme: StorageUtils.getTheme(),
config: StorageUtils.getConfig(),
showConfigDialog: false,
// const
themes: THEMES,
/** @type {CONFIG_DEFAULT} */
configDefault: {...CONFIG_DEFAULT},
configInfo: {...CONFIG_INFO},
isDev,
}
},
computed: {},
mounted() {
document.getElementById('app').classList.remove('opacity-0'); // show app
// scroll to the bottom when the pending message height is updated
const pendingMsgElem = document.getElementById('pending-msg');
const resizeObserver = new ResizeObserver(() => {
if (this.isGenerating) chatScrollToBottom(true);
});
resizeObserver.observe(pendingMsgElem);
this.setSelectedTheme(this.selectedTheme);
},
watch: {
viewingConvId: function(val, oldVal) {
if (val != oldVal) {
this.fetchMessages();
chatScrollToBottom();
this.hideSidebar();
}
}
},
methods: {
hideSidebar() {
document.getElementById('toggle-drawer').checked = false;
},
setSelectedTheme(theme) {
this.selectedTheme = theme;
document.body.setAttribute('data-theme', theme);
document.body.setAttribute('data-color-scheme', daisyuiThemes[theme]?.['color-scheme'] ?? 'auto');
StorageUtils.setTheme(theme);
},
newConversation() {
if (this.isGenerating) return;
this.viewingConvId = StorageUtils.getNewConvId();
},
setViewingConv(convId) {
if (this.isGenerating) return;
this.viewingConvId = convId;
},
deleteConv(convId) {
if (this.isGenerating) return;
if (window.confirm('Are you sure to delete this conversation?')) {
StorageUtils.remove(convId);
if (this.viewingConvId === convId) {
this.viewingConvId = StorageUtils.getNewConvId();
}
this.fetchConversation();
this.fetchMessages();
}
},
downloadConv(convId) {
const conversation = StorageUtils.getOneConversation(convId);
if (!conversation) {
alert('Conversation not found.');
return;
}
const conversationJson = JSON.stringify(conversation, null, 2);
const blob = new Blob([conversationJson], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `conversation_${convId}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
},
async sendMessage() {
if (!this.inputMsg) return;
const currConvId = this.viewingConvId;
StorageUtils.appendMsg(currConvId, {
id: Date.now(),
role: 'user',
content: this.inputMsg,
});
this.fetchConversation();
this.fetchMessages();
this.inputMsg = '';
this.generateMessage(currConvId);
chatScrollToBottom();
},
async generateMessage(currConvId) {
if (this.isGenerating) return;
this.pendingMsg = { id: Date.now()+1, role: 'assistant', content: null };
this.isGenerating = true;
try {
/** @type {CONFIG_DEFAULT} */
const config = this.config;
const abortController = new AbortController();
this.stopGeneration = () => abortController.abort();
/** @type {Array<APIMessage>} */
let messages = [
{ role: 'system', content: config.systemMessage },
...normalizeMsgsForAPI(this.messages),
];
if (config.excludeThoughtOnReq) {
messages = filterThoughtFromMsgs(messages);
}
if (isDev) console.log({messages});
const params = {
messages,
stream: true,
cache_prompt: true,
samplers: config.samplers,
temperature: config.temperature,
dynatemp_range: config.dynatemp_range,
dynatemp_exponent: config.dynatemp_exponent,
top_k: config.top_k,
top_p: config.top_p,
min_p: config.min_p,
typical_p: config.typical_p,
xtc_probability: config.xtc_probability,
xtc_threshold: config.xtc_threshold,
repeat_last_n: config.repeat_last_n,
repeat_penalty: config.repeat_penalty,
presence_penalty: config.presence_penalty,
frequency_penalty: config.frequency_penalty,
dry_multiplier: config.dry_multiplier,
dry_base: config.dry_base,
dry_allowed_length: config.dry_allowed_length,
dry_penalty_last_n: config.dry_penalty_last_n,
max_tokens: config.max_tokens,
timings_per_token: !!config.showTokensPerSecond,
...(config.custom.length ? JSON.parse(config.custom) : {}),
};
const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(config.apiKey ? {'Authorization': `Bearer ${config.apiKey}`} : {})
},
body: JSON.stringify(params),
signal: abortController.signal,
});
for await (const chunk of chunks) {
const stop = chunk.stop;
const addedContent = chunk.choices[0].delta.content;
const lastContent = this.pendingMsg.content || '';
if (addedContent) {
this.pendingMsg = {
id: this.pendingMsg.id,
role: 'assistant',
content: lastContent + addedContent,
};
}
const timings = chunk.timings;
if (timings && config.showTokensPerSecond) {
// only extract what's really needed, to save some space
this.pendingMsg.timings = {
prompt_n: timings.prompt_n,
prompt_ms: timings.prompt_ms,
predicted_n: timings.predicted_n,
predicted_ms: timings.predicted_ms,
};
}
}
StorageUtils.appendMsg(currConvId, this.pendingMsg);
this.fetchConversation();
this.fetchMessages();
setTimeout(() => document.getElementById('msg-input').focus(), 1);
} catch (error) {
if (error.name === 'AbortError') {
// user stopped the generation via stopGeneration() function
StorageUtils.appendMsg(currConvId, this.pendingMsg);
this.fetchConversation();
this.fetchMessages();
} else {
console.error(error);
alert(error);
// pop last user message
const lastUserMsg = StorageUtils.popMsg(currConvId);
this.inputMsg = lastUserMsg ? lastUserMsg.content : '';
}
}
this.pendingMsg = null;
this.isGenerating = false;
this.stopGeneration = () => {};
this.fetchMessages();
chatScrollToBottom();
},
// message actions
regenerateMsg(msg) {
if (this.isGenerating) return;
// TODO: somehow keep old history (like how ChatGPT has different "tree"). This can be done by adding "sub-conversations" with "subconv-" prefix, and new message will have a list of subconvIds
const currConvId = this.viewingConvId;
StorageUtils.filterAndKeepMsgs(currConvId, (m) => m.id < msg.id);
this.fetchConversation();
this.fetchMessages();
this.generateMessage(currConvId);
},
editUserMsgAndRegenerate(msg) {
if (this.isGenerating) return;
const currConvId = this.viewingConvId;
const newContent = msg.content;
StorageUtils.filterAndKeepMsgs(currConvId, (m) => m.id < msg.id);
StorageUtils.appendMsg(currConvId, {
id: Date.now(),
role: 'user',
content: newContent,
});
this.fetchConversation();
this.fetchMessages();
this.generateMessage(currConvId);
},
// settings dialog methods
closeAndSaveConfigDialog() {
try {
if (this.config.custom.length) JSON.parse(this.config.custom);
} catch (error) {
alert('Invalid JSON for custom config. Please either fix it or leave it empty.');
return;
}
for (const key of CONFIG_NUMERIC_KEYS) {
if (isNaN(this.config[key]) || this.config[key].toString().trim().length === 0) {
alert(`Invalid number for ${key} (expected an integer or a float)`);
return;
}
this.config[key] = parseFloat(this.config[key]);
}
this.showConfigDialog = false;
StorageUtils.setConfig(this.config);
},
closeAndDiscardConfigDialog() {
this.showConfigDialog = false;
this.config = StorageUtils.getConfig();
},
resetConfigDialog() {
if (window.confirm('Are you sure to reset all settings?')) {
this.config = {...CONFIG_DEFAULT};
}
},
// sync state functions
fetchConversation() {
this.conversations = StorageUtils.getAllConversations();
},
fetchMessages() {
this.messages = StorageUtils.getOneConversation(this.viewingConvId)?.messages ?? [];
},
// debug functions
async debugImportDemoConv() {
const res = await fetch('/demo-conversation.json');
const demoConv = await res.json();
StorageUtils.remove(demoConv.id);
for (const msg of demoConv.messages) {
StorageUtils.appendMsg(demoConv.id, msg);
}
this.fetchConversation();
}
},
});
mainApp.config.errorHandler = alert;
try {
mainApp.mount('#app');
} catch (err) {
console.error(err);
document.getElementById('app').innerHTML = `<div style="margin:2em auto">
Failed to start app. Please try clearing localStorage and try again.<br/>
<br/>
<button class="btn" onClick="localStorage.clear(); window.location.reload();">Clear localStorage</button>
</div>`;
}
/**
* filter out redundant fields upon sending to API
* @param {Array<APIMessage>} messages
* @returns {Array<APIMessage>}
*/
function normalizeMsgsForAPI(messages) {
return messages.map((msg) => {
return {
role: msg.role,
content: msg.content,
};
});
}
/**
* recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
* @param {Array<APIMessage>} messages
* @returns {Array<APIMessage>}
*/
function filterThoughtFromMsgs(messages) {
return messages.map((msg) => {
return {
role: msg.role,
content: msg.role === 'assistant'
? msg.content.split('</think>').at(-1).trim()
: msg.content,
};
});
}

View file

@ -0,0 +1,10 @@
import { StrictMode } from 'react';
import { createRoot } from 'react-dom/client';
import './index.scss';
import App from './App.tsx';
createRoot(document.getElementById('root')!).render(
<StrictMode>
<App />
</StrictMode>
);

View file

@ -0,0 +1,304 @@
import React, { createContext, useContext, useEffect, useState } from 'react';
import { APIMessage, Conversation, Message, PendingMessage } from './types';
import StorageUtils from './storage';
import {
filterThoughtFromMsgs,
normalizeMsgsForAPI,
getSSEStreamAsync,
} from './misc';
import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
import { matchPath, useLocation } from 'react-router';
interface AppContextValue {
viewingConversation: Conversation | null;
pendingMessages: Record<Conversation['id'], PendingMessage>;
isGenerating: (convId: string) => boolean;
sendMessage: (
convId: string,
content: string,
onChunk?: CallbackGeneratedChunk
) => Promise<boolean>;
stopGenerating: (convId: string) => void;
replaceMessageAndGenerate: (
convId: string,
origMsgId: Message['id'],
content?: string,
onChunk?: CallbackGeneratedChunk
) => Promise<void>;
config: typeof CONFIG_DEFAULT;
saveConfig: (config: typeof CONFIG_DEFAULT) => void;
}
// for now, this callback is only used for scrolling to the bottom of the chat
type CallbackGeneratedChunk = () => void;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const AppContext = createContext<AppContextValue>({} as any);
export const AppContextProvider = ({
children,
}: {
children: React.ReactElement;
}) => {
const { pathname } = useLocation();
const params = matchPath('/chat/:convId', pathname);
const convId = params?.params?.convId;
const [viewingConversation, setViewingConversation] =
useState<Conversation | null>(null);
const [pendingMessages, setPendingMessages] = useState<
Record<Conversation['id'], PendingMessage>
>({});
const [aborts, setAborts] = useState<
Record<Conversation['id'], AbortController>
>({});
const [config, setConfig] = useState(StorageUtils.getConfig());
useEffect(() => {
const handleConversationChange = (changedConvId: string) => {
if (changedConvId !== convId) return;
setViewingConversation(StorageUtils.getOneConversation(convId));
};
StorageUtils.onConversationChanged(handleConversationChange);
setViewingConversation(StorageUtils.getOneConversation(convId ?? ''));
return () => {
StorageUtils.offConversationChanged(handleConversationChange);
};
}, [convId]);
const setPending = (convId: string, pendingMsg: PendingMessage | null) => {
// if pendingMsg is null, remove the key from the object
if (!pendingMsg) {
setPendingMessages((prev) => {
const newState = { ...prev };
delete newState[convId];
return newState;
});
} else {
setPendingMessages((prev) => ({ ...prev, [convId]: pendingMsg }));
}
};
const setAbort = (convId: string, controller: AbortController | null) => {
if (!controller) {
setAborts((prev) => {
const newState = { ...prev };
delete newState[convId];
return newState;
});
} else {
setAborts((prev) => ({ ...prev, [convId]: controller }));
}
};
////////////////////////////////////////////////////////////////////////
// public functions
const isGenerating = (convId: string) => !!pendingMessages[convId];
const generateMessage = async (
convId: string,
onChunk?: CallbackGeneratedChunk
) => {
if (isGenerating(convId)) return;
const config = StorageUtils.getConfig();
const currConversation = StorageUtils.getOneConversation(convId);
if (!currConversation) {
throw new Error('Current conversation is not found');
}
const abortController = new AbortController();
setAbort(convId, abortController);
let pendingMsg: PendingMessage = {
id: Date.now() + 1,
role: 'assistant',
content: null,
};
setPending(convId, pendingMsg);
try {
// prepare messages for API
let messages: APIMessage[] = [
...(config.systemMessage.length === 0
? []
: [{ role: 'system', content: config.systemMessage } as APIMessage]),
...normalizeMsgsForAPI(currConversation?.messages ?? []),
];
if (config.excludeThoughtOnReq) {
messages = filterThoughtFromMsgs(messages);
}
if (isDev) console.log({ messages });
// prepare params
const params = {
messages,
stream: true,
cache_prompt: true,
samplers: config.samplers,
temperature: config.temperature,
dynatemp_range: config.dynatemp_range,
dynatemp_exponent: config.dynatemp_exponent,
top_k: config.top_k,
top_p: config.top_p,
min_p: config.min_p,
typical_p: config.typical_p,
xtc_probability: config.xtc_probability,
xtc_threshold: config.xtc_threshold,
repeat_last_n: config.repeat_last_n,
repeat_penalty: config.repeat_penalty,
presence_penalty: config.presence_penalty,
frequency_penalty: config.frequency_penalty,
dry_multiplier: config.dry_multiplier,
dry_base: config.dry_base,
dry_allowed_length: config.dry_allowed_length,
dry_penalty_last_n: config.dry_penalty_last_n,
max_tokens: config.max_tokens,
timings_per_token: !!config.showTokensPerSecond,
...(config.custom.length ? JSON.parse(config.custom) : {}),
};
// send request
const fetchResponse = await fetch(`${BASE_URL}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(config.apiKey
? { Authorization: `Bearer ${config.apiKey}` }
: {}),
},
body: JSON.stringify(params),
signal: abortController.signal,
});
if (fetchResponse.status !== 200) {
const body = await fetchResponse.json();
throw new Error(body?.error?.message || 'Unknown error');
}
const chunks = getSSEStreamAsync(fetchResponse);
for await (const chunk of chunks) {
// const stop = chunk.stop;
if (chunk.error) {
throw new Error(chunk.error?.message || 'Unknown error');
}
const addedContent = chunk.choices[0].delta.content;
const lastContent = pendingMsg.content || '';
if (addedContent) {
pendingMsg = {
id: pendingMsg.id,
role: 'assistant',
content: lastContent + addedContent,
};
}
const timings = chunk.timings;
if (timings && config.showTokensPerSecond) {
// only extract what's really needed, to save some space
pendingMsg.timings = {
prompt_n: timings.prompt_n,
prompt_ms: timings.prompt_ms,
predicted_n: timings.predicted_n,
predicted_ms: timings.predicted_ms,
};
}
setPending(convId, pendingMsg);
onChunk?.();
}
} catch (err) {
setPending(convId, null);
if ((err as Error).name === 'AbortError') {
// user stopped the generation via stopGeneration() function
// we can safely ignore this error
} else {
console.error(err);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
alert((err as any)?.message ?? 'Unknown error');
throw err; // rethrow
}
}
if (pendingMsg.content) {
StorageUtils.appendMsg(currConversation.id, {
id: pendingMsg.id,
content: pendingMsg.content,
role: pendingMsg.role,
timings: pendingMsg.timings,
});
}
setPending(convId, null);
onChunk?.(); // trigger scroll to bottom
};
const sendMessage = async (
convId: string,
content: string,
onChunk?: CallbackGeneratedChunk
): Promise<boolean> => {
if (isGenerating(convId) || content.trim().length === 0) return false;
StorageUtils.appendMsg(convId, {
id: Date.now(),
role: 'user',
content,
});
try {
await generateMessage(convId, onChunk);
return true;
} catch (_) {
// rollback
StorageUtils.popMsg(convId);
}
return false;
};
const stopGenerating = (convId: string) => {
setPending(convId, null);
aborts[convId]?.abort();
};
// if content is undefined, we remove last assistant message
const replaceMessageAndGenerate = async (
convId: string,
origMsgId: Message['id'],
content?: string,
onChunk?: CallbackGeneratedChunk
) => {
if (isGenerating(convId)) return;
StorageUtils.filterAndKeepMsgs(convId, (msg) => msg.id < origMsgId);
if (content) {
StorageUtils.appendMsg(convId, {
id: Date.now(),
role: 'user',
content,
});
}
await generateMessage(convId, onChunk);
};
const saveConfig = (config: typeof CONFIG_DEFAULT) => {
StorageUtils.setConfig(config);
setConfig(config);
};
return (
<AppContext.Provider
value={{
isGenerating,
viewingConversation,
pendingMessages,
sendMessage,
stopGenerating,
replaceMessageAndGenerate,
config,
saveConfig,
}}
>
{children}
</AppContext.Provider>
);
};
export const useAppContext = () => useContext(AppContext);

View file

@ -0,0 +1,87 @@
// @ts-expect-error this package does not have typing
import TextLineStream from 'textlinestream';
import { APIMessage, Message } from './types';
// ponyfill for missing ReadableStream asyncIterator on Safari
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
import { isDev } from '../Config';
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isString = (x: any) => !!x.toLowerCase;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isBoolean = (x: any) => x === true || x === false;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const isNumeric = (n: any) => !isString(n) && !isNaN(n) && !isBoolean(n);
export const escapeAttr = (str: string) =>
str.replace(/>/g, '&gt;').replace(/"/g, '&quot;');
// wrapper for SSE
export async function* getSSEStreamAsync(fetchResponse: Response) {
if (!fetchResponse.body) throw new Error('Response body is empty');
const lines: ReadableStream<string> = fetchResponse.body
.pipeThrough(new TextDecoderStream())
.pipeThrough(new TextLineStream());
// @ts-expect-error asyncIterator complains about type, but it should work
for await (const line of asyncIterator(lines)) {
if (isDev) console.log({ line });
if (line.startsWith('data:') && !line.endsWith('[DONE]')) {
const data = JSON.parse(line.slice(5));
yield data;
} else if (line.startsWith('error:')) {
const data = JSON.parse(line.slice(6));
throw new Error(data.message || 'Unknown error');
}
}
}
// copy text to clipboard
export const copyStr = (textToCopy: string) => {
// Navigator clipboard api needs a secure context (https)
if (navigator.clipboard && window.isSecureContext) {
navigator.clipboard.writeText(textToCopy);
} else {
// Use the 'out of viewport hidden text area' trick
const textArea = document.createElement('textarea');
textArea.value = textToCopy;
// Move textarea out of the viewport so it's not visible
textArea.style.position = 'absolute';
textArea.style.left = '-999999px';
document.body.prepend(textArea);
textArea.select();
document.execCommand('copy');
}
};
/**
* filter out redundant fields upon sending to API
*/
export function normalizeMsgsForAPI(messages: Message[]) {
return messages.map((msg) => {
return {
role: msg.role,
content: msg.content,
};
}) as APIMessage[];
}
/**
* recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
*/
export function filterThoughtFromMsgs(messages: APIMessage[]) {
return messages.map((msg) => {
return {
role: msg.role,
content:
msg.role === 'assistant'
? msg.content.split('</think>').at(-1)!.trim()
: msg.content,
} as APIMessage;
});
}
export function classNames(classes: Record<string, boolean>): string {
return Object.entries(classes)
.filter(([_, value]) => value)
.map(([key, _]) => key)
.join(' ');
}

View file

@ -0,0 +1,138 @@
// coversations is stored in localStorage
// format: { [convId]: { id: string, lastModified: number, messages: [...] } }
import { CONFIG_DEFAULT } from '../Config';
import { Conversation, Message } from './types';
const event = new EventTarget();
type CallbackConversationChanged = (convId: string) => void;
let onConversationChangedHandlers: [
CallbackConversationChanged,
EventListener,
][] = [];
const dispatchConversationChange = (convId: string) => {
event.dispatchEvent(
new CustomEvent('conversationChange', { detail: { convId } })
);
};
// convId is a string prefixed with 'conv-'
const StorageUtils = {
/**
* manage conversations
*/
getAllConversations(): Conversation[] {
const res = [];
for (const key in localStorage) {
if (key.startsWith('conv-')) {
res.push(JSON.parse(localStorage.getItem(key) ?? '{}'));
}
}
res.sort((a, b) => b.lastModified - a.lastModified);
return res;
},
/**
* can return null if convId does not exist
*/
getOneConversation(convId: string): Conversation | null {
return JSON.parse(localStorage.getItem(convId) || 'null');
},
/**
* if convId does not exist, create one
*/
appendMsg(convId: string, msg: Message): void {
if (msg.content === null) return;
const conv = StorageUtils.getOneConversation(convId) || {
id: convId,
lastModified: Date.now(),
messages: [],
};
conv.messages.push(msg);
conv.lastModified = Date.now();
localStorage.setItem(convId, JSON.stringify(conv));
dispatchConversationChange(convId);
},
/**
* Get new conversation id
*/
getNewConvId(): string {
return `conv-${Date.now()}`;
},
/**
* remove conversation by id
*/
remove(convId: string): void {
localStorage.removeItem(convId);
dispatchConversationChange(convId);
},
/**
* remove all conversations
*/
filterAndKeepMsgs(
convId: string,
predicate: (msg: Message) => boolean
): void {
const conv = StorageUtils.getOneConversation(convId);
if (!conv) return;
conv.messages = conv.messages.filter(predicate);
conv.lastModified = Date.now();
localStorage.setItem(convId, JSON.stringify(conv));
dispatchConversationChange(convId);
},
/**
* remove last message from conversation
*/
popMsg(convId: string): Message | undefined {
const conv = StorageUtils.getOneConversation(convId);
if (!conv) return;
const msg = conv.messages.pop();
conv.lastModified = Date.now();
if (conv.messages.length === 0) {
StorageUtils.remove(convId);
} else {
localStorage.setItem(convId, JSON.stringify(conv));
}
dispatchConversationChange(convId);
return msg;
},
// event listeners
onConversationChanged(callback: CallbackConversationChanged) {
const fn = (e: Event) => callback((e as CustomEvent).detail.convId);
onConversationChangedHandlers.push([callback, fn]);
event.addEventListener('conversationChange', fn);
},
offConversationChanged(callback: CallbackConversationChanged) {
const fn = onConversationChangedHandlers.find(([cb, _]) => cb === callback);
if (fn) {
event.removeEventListener('conversationChange', fn[1]);
}
onConversationChangedHandlers = [];
},
// manage config
getConfig(): typeof CONFIG_DEFAULT {
const savedVal = JSON.parse(localStorage.getItem('config') || '{}');
// to prevent breaking changes in the future, we always provide default value for missing keys
return {
...CONFIG_DEFAULT,
...savedVal,
};
},
setConfig(config: typeof CONFIG_DEFAULT) {
localStorage.setItem('config', JSON.stringify(config));
},
getTheme(): string {
return localStorage.getItem('theme') || 'auto';
},
setTheme(theme: string) {
if (theme === 'auto') {
localStorage.removeItem('theme');
} else {
localStorage.setItem('theme', theme);
}
},
};
export default StorageUtils;

View file

@ -0,0 +1,25 @@
export interface TimingReport {
prompt_n: number;
prompt_ms: number;
predicted_n: number;
predicted_ms: number;
}
export interface Message {
id: number;
role: 'user' | 'assistant' | 'system';
content: string;
timings?: TimingReport;
}
export type APIMessage = Pick<Message, 'role' | 'content'>;
export interface Conversation {
id: string; // format: `conv-{timestamp}`
lastModified: number; // timestamp from Date.now()
messages: Message[];
}
export type PendingMessage = Omit<Message, 'content'> & {
content: string | null;
};

View file

@ -0,0 +1 @@
/// <reference types="vite/client" />

View file

@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"target": "ES2021",
"useDefineForClassFields": true,
"lib": ["ES2021", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src"]
}

View file

@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}

View file

@ -0,0 +1,24 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"target": "ES2022",
"lib": ["ES2023"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"isolatedModules": true,
"moduleDetection": "force",
"noEmit": true,
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["vite.config.ts"]
}

View file

@ -1,8 +1,11 @@
import { defineConfig, PluginOption } from 'vite';
import react from '@vitejs/plugin-react';
import { viteSingleFile } from 'vite-plugin-singlefile';
import path from 'path';
import fs from 'fs';
import zlib from 'zlib';
import path from 'node:path';
import fs from 'node:fs';
import zlib from 'node:zlib';
/* eslint-disable */
const MAX_BUNDLE_SIZE = 1.5 * 1024 * 1024; // only increase when absolutely necessary
@ -15,20 +18,26 @@ const GUIDE_FOR_FRONTEND = `
-->
`.trim();
const FRONTEND_PLUGINS = [react()];
const BUILD_PLUGINS = [
...FRONTEND_PLUGINS,
viteSingleFile(),
(function llamaCppPlugin() {
let config;
let config: any;
return {
name: 'llamacpp:build',
apply: 'build',
async configResolved(_config) {
async configResolved(_config: any) {
config = _config;
},
writeBundle() {
const outputIndexHtml = path.join(config.build.outDir, 'index.html');
const content = GUIDE_FOR_FRONTEND + '\n' + fs.readFileSync(outputIndexHtml, 'utf-8');
const compressed = zlib.gzipSync(Buffer.from(content, 'utf-8'), { level: 9 });
const content =
GUIDE_FOR_FRONTEND + '\n' + fs.readFileSync(outputIndexHtml, 'utf-8');
const compressed = zlib.gzipSync(Buffer.from(content, 'utf-8'), {
level: 9,
});
// because gzip header contains machine-specific info, we must remove these data from the header
// timestamp
@ -42,18 +51,26 @@ const BUILD_PLUGINS = [
if (compressed.byteLength > MAX_BUNDLE_SIZE) {
throw new Error(
`Bundle size is too large (${Math.ceil(compressed.byteLength / 1024)} KB).\n` +
`Please reduce the size of the frontend or increase MAX_BUNDLE_SIZE in vite.config.js.\n`,
`Please reduce the size of the frontend or increase MAX_BUNDLE_SIZE in vite.config.js.\n`
);
}
const targetOutputFile = path.join(config.build.outDir, '../../public/index.html.gz');
const targetOutputFile = path.join(
config.build.outDir,
'../../public/index.html.gz'
);
fs.writeFileSync(targetOutputFile, compressed);
}
}
},
} satisfies PluginOption;
})(),
];
/** @type {import('vite').UserConfig} */
export default {
plugins: process.env.ANALYZE ? [] : BUILD_PLUGINS,
};
export default defineConfig({
// @ts-ignore
plugins: process.env.ANALYZE ? FRONTEND_PLUGINS : BUILD_PLUGINS,
server: {
proxy: {
'/v1': 'http://localhost:8080',
},
},
});