removed some whitespaces
This commit is contained in:
parent
0bb1ff4402
commit
bec44ebdbc
4 changed files with 13 additions and 12 deletions
|
@ -5,4 +5,4 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -108,8 +108,9 @@ Options:
|
|||
|
||||
`threads`: Set the number of threads to use during computation.
|
||||
|
||||
`context`: Set a short conversation as context.
|
||||
Insert a array of this form: `{ role: "user", content: "Hello, Assistant." }`, where:
|
||||
`context`: Set a short conversation as context.
|
||||
|
||||
Insert items to an array of this form: `{ role: "user", content: "Hello, Assistant." }`, where:
|
||||
|
||||
`role` can be `system`, `assistant` and `user`.
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ bool Llama::load_context() {
|
|||
|
||||
bool Llama::prompt_test() {
|
||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
|
||||
if ((int)embd_inp.size() > n_ctx - 4)
|
||||
{
|
||||
fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int)embd_inp.size(), n_ctx - 4);
|
||||
|
@ -71,7 +71,7 @@ void Llama::setting_context() {
|
|||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n",
|
||||
params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau);
|
||||
fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
|
||||
|
@ -636,7 +636,7 @@ int main(int argc, char ** argv) {
|
|||
Server svr;
|
||||
|
||||
svr.Get("/", [](const Request &req, Response &res)
|
||||
{
|
||||
{
|
||||
res.set_content("<h1>llama.cpp server works</h1>", "text/html");
|
||||
}
|
||||
);
|
||||
|
@ -645,7 +645,7 @@ int main(int argc, char ** argv) {
|
|||
if(!llama->context_config) {
|
||||
json body = json::parse(req.body);
|
||||
/*
|
||||
Seed whould be passed by the request, but seem
|
||||
Seed whould be passed by the request, but seem
|
||||
the current implementation need it in the load file
|
||||
*/
|
||||
if (!body["threads"].is_null())
|
||||
|
@ -747,7 +747,7 @@ int main(int argc, char ** argv) {
|
|||
});
|
||||
|
||||
svr.Get("/completion", [&llama](const Request &req, Response &res)
|
||||
{
|
||||
{
|
||||
bool stream = false;
|
||||
if (req.has_param("stream")) {
|
||||
stream = req.get_param_value("stream") == "true";
|
||||
|
@ -788,7 +788,7 @@ int main(int argc, char ** argv) {
|
|||
});
|
||||
|
||||
printf("llama.cpp HTTP Server Listening at http://%s:%i", hostname.c_str(), port);
|
||||
|
||||
|
||||
// change hostname and port
|
||||
svr.listen(hostname, port);
|
||||
}
|
|
@ -4,7 +4,7 @@
|
|||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
/*
|
||||
/*
|
||||
This isn't the best way to do this.
|
||||
|
||||
Missing:
|
||||
|
@ -22,7 +22,7 @@ class Llama{
|
|||
void setting_context();
|
||||
int set_message(std::string msg);
|
||||
void release();
|
||||
|
||||
|
||||
llama_token nextToken();
|
||||
std::string inference();
|
||||
|
||||
|
@ -47,4 +47,4 @@ class Llama{
|
|||
// to ignore this in the completion
|
||||
std::vector<int> user_tag_tokens;
|
||||
std::vector<int> assistant_tag_tokens;
|
||||
};
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue