fix linter warnings + make variables const
This commit is contained in:
parent
575cf23862
commit
7df316b728
1 changed files with 33 additions and 38 deletions
|
@ -67,9 +67,10 @@ static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void server_log(const char * level, const char * function, int line, const char * message, nlohmann::ordered_json extra) {
|
||||
static void server_log(const char * level, const char * function, int line,
|
||||
const char * message, const nlohmann::ordered_json & extra) {
|
||||
nlohmann::ordered_json log {
|
||||
{ "timestamp", time(NULL) },
|
||||
{ "timestamp", time(nullptr) },
|
||||
{ "level", level },
|
||||
{ "function", function },
|
||||
{ "line", line },
|
||||
|
@ -80,7 +81,7 @@ static void server_log(const char * level, const char * function, int line, cons
|
|||
log.merge_patch(extra);
|
||||
}
|
||||
|
||||
std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
fprintf(stdout, "%.*s\n", (int)str.size(), str.data());
|
||||
fflush(stdout);
|
||||
}
|
||||
|
@ -105,7 +106,7 @@ static bool server_verbose = false;
|
|||
struct llama_server_context {
|
||||
bool stream = false;
|
||||
bool has_next_token = false;
|
||||
std::string generated_text = "";
|
||||
std::string generated_text;
|
||||
|
||||
size_t num_tokens_predicted = 0;
|
||||
size_t n_past = 0;
|
||||
|
@ -150,7 +151,7 @@ struct llama_server_context {
|
|||
bool loadModel(const gpt_params & params_) {
|
||||
params = params_;
|
||||
ctx = llama_init_from_gpt_params(params);
|
||||
if (ctx == NULL) {
|
||||
if (ctx == nullptr) {
|
||||
LOG_ERROR("unable to load model", { { "model", params_.model } });
|
||||
return false;
|
||||
}
|
||||
|
@ -267,7 +268,9 @@ struct llama_server_context {
|
|||
const float mirostat_tau = params.mirostat_tau;
|
||||
const float mirostat_eta = params.mirostat_eta;
|
||||
const bool penalize_nl = params.penalize_nl;
|
||||
llama_token id = 0; {
|
||||
llama_token id = 0;
|
||||
|
||||
{
|
||||
auto * logits = llama_get_logits(ctx);
|
||||
auto n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
|
@ -370,9 +373,9 @@ struct llama_server_context {
|
|||
}
|
||||
|
||||
std::string doCompletion() {
|
||||
llama_token token = nextToken();
|
||||
const llama_token token = nextToken();
|
||||
|
||||
std::string token_text = token == -1 ? "" : llama_token_to_str(ctx, token);
|
||||
const std::string token_text = token == -1 ? "" : llama_token_to_str(ctx, token);
|
||||
generated_text += token_text;
|
||||
|
||||
if (multibyte_pending > 0) {
|
||||
|
@ -546,12 +549,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
|||
std::vector<std::string> split_arg{ it, {} };
|
||||
GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES);
|
||||
|
||||
for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) {
|
||||
if (i < split_arg.size()) {
|
||||
params.tensor_split[i] = std::stof(split_arg[i]);
|
||||
for (size_t i_device = 0; i_device < LLAMA_MAX_DEVICES; ++i_device) {
|
||||
if (i_device < split_arg.size()) {
|
||||
params.tensor_split[i_device] = std::stof(split_arg[i_device]);
|
||||
}
|
||||
else {
|
||||
params.tensor_split[i] = 0.0f;
|
||||
params.tensor_split[i_device] = 0.0f;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
@ -663,7 +666,7 @@ static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
|
|||
};
|
||||
}
|
||||
|
||||
bool parse_options_completion(json body, llama_server_context & llama) {
|
||||
static void parse_options_completion(const json & body, llama_server_context & llama) {
|
||||
gpt_params default_params;
|
||||
|
||||
llama.stream = body.value("stream", false);
|
||||
|
@ -691,7 +694,7 @@ bool parse_options_completion(json body, llama_server_context & llama) {
|
|||
}
|
||||
|
||||
if (body["logit_bias"].is_array()) {
|
||||
int n_vocab = llama_n_vocab(llama.ctx);
|
||||
const int n_vocab = llama_n_vocab(llama.ctx);
|
||||
for (const auto & el : body["logit_bias"]) {
|
||||
if (el.is_array() && el.size() == 2 && el[0].is_number_integer()) {
|
||||
llama_token tok = el[0].get<llama_token>();
|
||||
|
@ -715,8 +718,6 @@ bool parse_options_completion(json body, llama_server_context & llama) {
|
|||
}
|
||||
|
||||
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void log_server_request(const Request & req, const Response & res) {
|
||||
|
@ -773,13 +774,10 @@ int main(int argc, char ** argv) {
|
|||
});
|
||||
|
||||
svr.Post("/completion", [&llama](const Request & req, Response & res) {
|
||||
|
||||
llama.rewind();
|
||||
llama_reset_timings(llama.ctx);
|
||||
|
||||
if (!parse_options_completion(json::parse(req.body), llama)) {
|
||||
return;
|
||||
}
|
||||
parse_options_completion(json::parse(req.body), llama);
|
||||
|
||||
llama.loadPrompt();
|
||||
llama.beginCompletion();
|
||||
|
@ -802,7 +800,7 @@ int main(int argc, char ** argv) {
|
|||
llama.generated_text.end());
|
||||
}
|
||||
|
||||
json data = format_final_response(llama, llama.generated_text);
|
||||
const json data = format_final_response(llama, llama.generated_text);
|
||||
|
||||
llama_print_timings(llama.ctx);
|
||||
|
||||
|
@ -820,7 +818,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
size_t pos = std::min(sent_count, llama.generated_text.size());
|
||||
|
||||
const char* str_test = llama.generated_text.c_str() + pos;
|
||||
const std::string str_test = llama.generated_text.substr(pos);
|
||||
size_t stop_pos =
|
||||
llama.findStoppingStrings(str_test, token_text.size(), STOP_FULL);
|
||||
if (stop_pos != std::string::npos) {
|
||||
|
@ -833,18 +831,15 @@ int main(int argc, char ** argv) {
|
|||
STOP_PARTIAL);
|
||||
}
|
||||
|
||||
std::string to_send = llama.generated_text.substr(pos, stop_pos);
|
||||
const std::string to_send = llama.generated_text.substr(pos, stop_pos);
|
||||
sent_count += to_send.size();
|
||||
|
||||
json data;
|
||||
if (llama.has_next_token) {
|
||||
data = format_partial_response(to_send);
|
||||
} else {
|
||||
const json data = llama.has_next_token
|
||||
? format_partial_response(to_send)
|
||||
// Generation is done, send extra information.
|
||||
data = format_final_response(llama, to_send);
|
||||
}
|
||||
: format_final_response(llama, to_send);
|
||||
|
||||
std::string str =
|
||||
const std::string str =
|
||||
"data: " +
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n";
|
||||
|
@ -873,10 +868,10 @@ int main(int argc, char ** argv) {
|
|||
});
|
||||
|
||||
svr.Post("/tokenize", [&llama](const Request & req, Response & res) {
|
||||
json body = json::parse(req.body);
|
||||
std::string content = body["content"].get<std::string>();
|
||||
std::vector<llama_token> tokens = ::llama_tokenize(llama.ctx, content, false);
|
||||
json data = format_tokenizer_response(tokens);
|
||||
const json body = json::parse(req.body);
|
||||
const std::string content = body["content"].get<std::string>();
|
||||
const std::vector<llama_token> tokens = llama_tokenize(llama.ctx, content, false);
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
return res.set_content(data.dump(), "application/json");
|
||||
});
|
||||
|
||||
|
@ -887,7 +882,7 @@ int main(int argc, char ** argv) {
|
|||
char buf[BUFSIZ];
|
||||
try {
|
||||
std::rethrow_exception(std::move(ep));
|
||||
} catch (std::exception& e) {
|
||||
} catch (std::exception & e) {
|
||||
snprintf(buf, sizeof(buf), fmt, e.what());
|
||||
} catch (...) {
|
||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue