From bec44ebdbc2419cf03b83608bc99db274edd4e7e Mon Sep 17 00:00:00 2001 From: FSSRepo Date: Sun, 14 May 2023 11:02:24 -0600 Subject: [PATCH] removed some whitespaces --- examples/server/CMakeLists.txt | 2 +- examples/server/README.md | 5 +++-- examples/server/server.cpp | 12 ++++++------ examples/server/server.h | 6 +++--- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 8b2bed23f..92079c5d0 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -5,4 +5,4 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) add_dependencies(${TARGET} BUILD_INFO) -endif() \ No newline at end of file +endif() diff --git a/examples/server/README.md b/examples/server/README.md index 4e3a2ce3f..c20f316ca 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -108,8 +108,9 @@ Options: `threads`: Set the number of threads to use during computation. -`context`: Set a short conversation as context. -Insert a array of this form: `{ role: "user", content: "Hello, Assistant." }`, where: +`context`: Set a short conversation as context. + +Insert items to an array of this form: `{ role: "user", content: "Hello, Assistant." }`, where: `role` can be `system`, `assistant` and `user`. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 4d891f27c..036a0b11c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -44,7 +44,7 @@ bool Llama::load_context() { bool Llama::prompt_test() { embd_inp = ::llama_tokenize(ctx, params.prompt, true); - + if ((int)embd_inp.size() > n_ctx - 4) { fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4); @@ -71,7 +71,7 @@ void Llama::setting_context() { fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } - + fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n", params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau); fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep); @@ -636,7 +636,7 @@ int main(int argc, char ** argv) { Server svr; svr.Get("/", [](const Request &req, Response &res) - { + { res.set_content("

llama.cpp server works

", "text/html"); } ); @@ -645,7 +645,7 @@ int main(int argc, char ** argv) { if(!llama->context_config) { json body = json::parse(req.body); /* - Seed whould be passed by the request, but seem + Seed whould be passed by the request, but seem the current implementation need it in the load file */ if (!body["threads"].is_null()) @@ -747,7 +747,7 @@ int main(int argc, char ** argv) { }); svr.Get("/completion", [&llama](const Request &req, Response &res) - { + { bool stream = false; if (req.has_param("stream")) { stream = req.get_param_value("stream") == "true"; @@ -788,7 +788,7 @@ int main(int argc, char ** argv) { }); printf("llama.cpp HTTP Server Listening at http://%s:%i", hostname.c_str(), port); - + // change hostname and port svr.listen(hostname, port); } \ No newline at end of file diff --git a/examples/server/server.h b/examples/server/server.h index 7263ecc5a..fe5ab9607 100644 --- a/examples/server/server.h +++ b/examples/server/server.h @@ -4,7 +4,7 @@ #include "common.h" #include "llama.h" -/* +/* This isn't the best way to do this. Missing: @@ -22,7 +22,7 @@ class Llama{ void setting_context(); int set_message(std::string msg); void release(); - + llama_token nextToken(); std::string inference(); @@ -47,4 +47,4 @@ class Llama{ // to ignore this in the completion std::vector user_tag_tokens; std::vector assistant_tag_tokens; -}; \ No newline at end of file +};