removed some whitespaces

This commit is contained in:
FSSRepo 2023-05-14 11:02:24 -06:00
parent 0bb1ff4402
commit bec44ebdbc
4 changed files with 13 additions and 12 deletions

View file

@ -5,4 +5,4 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_11)
if(TARGET BUILD_INFO) if(TARGET BUILD_INFO)
add_dependencies(${TARGET} BUILD_INFO) add_dependencies(${TARGET} BUILD_INFO)
endif() endif()

View file

@ -108,8 +108,9 @@ Options:
`threads`: Set the number of threads to use during computation. `threads`: Set the number of threads to use during computation.
`context`: Set a short conversation as context. `context`: Set a short conversation as context.
Insert a array of this form: `{ role: "user", content: "Hello, Assistant." }`, where:
Insert items to an array of this form: `{ role: "user", content: "Hello, Assistant." }`, where:
`role` can be `system`, `assistant` and `user`. `role` can be `system`, `assistant` and `user`.

View file

@ -44,7 +44,7 @@ bool Llama::load_context() {
bool Llama::prompt_test() { bool Llama::prompt_test() {
embd_inp = ::llama_tokenize(ctx, params.prompt, true); embd_inp = ::llama_tokenize(ctx, params.prompt, true);
if ((int)embd_inp.size() > n_ctx - 4) if ((int)embd_inp.size() > n_ctx - 4)
{ {
fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4); fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
@ -71,7 +71,7 @@ void Llama::setting_context() {
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
} }
fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n", fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n",
params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau); params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau);
fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep); fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
@ -636,7 +636,7 @@ int main(int argc, char ** argv) {
Server svr; Server svr;
svr.Get("/", [](const Request &req, Response &res) svr.Get("/", [](const Request &req, Response &res)
{ {
res.set_content("<h1>llama.cpp server works</h1>", "text/html"); res.set_content("<h1>llama.cpp server works</h1>", "text/html");
} }
); );
@ -645,7 +645,7 @@ int main(int argc, char ** argv) {
if(!llama->context_config) { if(!llama->context_config) {
json body = json::parse(req.body); json body = json::parse(req.body);
/* /*
Seed whould be passed by the request, but seem Seed whould be passed by the request, but seem
the current implementation need it in the load file the current implementation need it in the load file
*/ */
if (!body["threads"].is_null()) if (!body["threads"].is_null())
@ -747,7 +747,7 @@ int main(int argc, char ** argv) {
}); });
svr.Get("/completion", [&llama](const Request &req, Response &res) svr.Get("/completion", [&llama](const Request &req, Response &res)
{ {
bool stream = false; bool stream = false;
if (req.has_param("stream")) { if (req.has_param("stream")) {
stream = req.get_param_value("stream") == "true"; stream = req.get_param_value("stream") == "true";
@ -788,7 +788,7 @@ int main(int argc, char ** argv) {
}); });
printf("llama.cpp HTTP Server Listening at http://%s:%i", hostname.c_str(), port); printf("llama.cpp HTTP Server Listening at http://%s:%i", hostname.c_str(), port);
// change hostname and port // change hostname and port
svr.listen(hostname, port); svr.listen(hostname, port);
} }

View file

@ -4,7 +4,7 @@
#include "common.h" #include "common.h"
#include "llama.h" #include "llama.h"
/* /*
This isn't the best way to do this. This isn't the best way to do this.
Missing: Missing:
@ -22,7 +22,7 @@ class Llama{
void setting_context(); void setting_context();
int set_message(std::string msg); int set_message(std::string msg);
void release(); void release();
llama_token nextToken(); llama_token nextToken();
std::string inference(); std::string inference();
@ -47,4 +47,4 @@ class Llama{
// to ignore this in the completion // to ignore this in the completion
std::vector<int> user_tag_tokens; std::vector<int> user_tag_tokens;
std::vector<int> assistant_tag_tokens; std::vector<int> assistant_tag_tokens;
}; };