* dont ruint all whitespace
This commit is contained in:
parent
5805fdaae2
commit
f26c51b0d1
1 changed files with 640 additions and 661 deletions
|
@ -724,8 +724,7 @@ struct llama_server_context
|
||||||
if (data.count("__oaicompat") != 0) {
|
if (data.count("__oaicompat") != 0) {
|
||||||
slot->oaicompat = true;
|
slot->oaicompat = true;
|
||||||
slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
|
slot->oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
slot->oaicompat = false;
|
slot->oaicompat = false;
|
||||||
slot->oaicompat_model = "";
|
slot->oaicompat_model = "";
|
||||||
}
|
}
|
||||||
|
@ -914,8 +913,7 @@ struct llama_server_context
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
} catch (const std::invalid_argument& e) {
|
||||||
catch (const std::invalid_argument& e) {
|
|
||||||
LOG_TEE("Invalid image number id in prompt\n");
|
LOG_TEE("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
|
@ -2421,8 +2419,7 @@ static void server_params_parse(int argc, char** argv, server_params& sparams,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.n_parallel = std::stoi(argv[i]);
|
params.n_parallel = std::stoi(argv[i]);
|
||||||
}
|
} else if (arg == "-n" || arg == "--n-predict")
|
||||||
else if (arg == "-n" || arg == "--n-predict")
|
|
||||||
{
|
{
|
||||||
if (++i >= argc)
|
if (++i >= argc)
|
||||||
{
|
{
|
||||||
|
@ -2430,8 +2427,7 @@ static void server_params_parse(int argc, char** argv, server_params& sparams,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.n_predict = std::stoi(argv[i]);
|
params.n_predict = std::stoi(argv[i]);
|
||||||
}
|
} else if (arg == "-spf" || arg == "--system-prompt-file")
|
||||||
else if (arg == "-spf" || arg == "--system-prompt-file")
|
|
||||||
{
|
{
|
||||||
if (++i >= argc)
|
if (++i >= argc)
|
||||||
{
|
{
|
||||||
|
@ -2486,28 +2482,23 @@ static void server_params_parse(int argc, char** argv, server_params& sparams,
|
||||||
sep += 4;
|
sep += 4;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_INT;
|
kvo.tag = LLAMA_KV_OVERRIDE_INT;
|
||||||
kvo.int_value = std::atol(sep);
|
kvo.int_value = std::atol(sep);
|
||||||
}
|
} else if (strncmp(sep, "float:", 6) == 0) {
|
||||||
else if (strncmp(sep, "float:", 6) == 0) {
|
|
||||||
sep += 6;
|
sep += 6;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_FLOAT;
|
kvo.tag = LLAMA_KV_OVERRIDE_FLOAT;
|
||||||
kvo.float_value = std::atof(sep);
|
kvo.float_value = std::atof(sep);
|
||||||
}
|
} else if (strncmp(sep, "bool:", 5) == 0) {
|
||||||
else if (strncmp(sep, "bool:", 5) == 0) {
|
|
||||||
sep += 5;
|
sep += 5;
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_BOOL;
|
kvo.tag = LLAMA_KV_OVERRIDE_BOOL;
|
||||||
if (std::strcmp(sep, "true") == 0) {
|
if (std::strcmp(sep, "true") == 0) {
|
||||||
kvo.bool_value = true;
|
kvo.bool_value = true;
|
||||||
}
|
} else if (std::strcmp(sep, "false") == 0) {
|
||||||
else if (std::strcmp(sep, "false") == 0) {
|
|
||||||
kvo.bool_value = false;
|
kvo.bool_value = false;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
|
@ -2617,8 +2608,7 @@ json oaicompat_completion_params_parse(
|
||||||
// Handle 'stop' field
|
// Handle 'stop' field
|
||||||
if (body.contains("stop") && body["stop"].is_string()) {
|
if (body.contains("stop") && body["stop"].is_string()) {
|
||||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
llama_params["stop"] = json_value(body, "stop", json::array());
|
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2709,15 +2699,13 @@ static std::vector<json> format_partial_response_oaicompat(const task_result& re
|
||||||
choices = json::array({json{{"finish_reason", finish_reason},
|
choices = json::array({json{{"finish_reason", finish_reason},
|
||||||
{"index", 0},
|
{"index", 0},
|
||||||
{"delta", json::object()}}});
|
{"delta", json::object()}}});
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
if (first) {
|
if (first) {
|
||||||
if (content.empty()) {
|
if (content.empty()) {
|
||||||
choices = json::array({json{{"finish_reason", nullptr},
|
choices = json::array({json{{"finish_reason", nullptr},
|
||||||
{"index", 0},
|
{"index", 0},
|
||||||
{"delta", json{{"role", "assistant"}}}}});
|
{"delta", json{{"role", "assistant"}}}}});
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
// We have to send this as two updates to conform to openai behavior
|
// We have to send this as two updates to conform to openai behavior
|
||||||
json initial_ret = json{{"choices", json::array({json{
|
json initial_ret = json{{"choices", json::array({json{
|
||||||
{"finish_reason", nullptr},
|
{"finish_reason", nullptr},
|
||||||
|
@ -2743,8 +2731,7 @@ static std::vector<json> format_partial_response_oaicompat(const task_result& re
|
||||||
|
|
||||||
return std::vector<json>({initial_ret, second_ret});
|
return std::vector<json>({initial_ret, second_ret});
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
// Some idiosyncrasy in task processing logic makes several trailing calls
|
// Some idiosyncrasy in task processing logic makes several trailing calls
|
||||||
// with empty content, we ignore these at the calee site.
|
// with empty content, we ignore these at the calee site.
|
||||||
if (content.empty()) {
|
if (content.empty()) {
|
||||||
|
@ -2967,8 +2954,7 @@ int main(int argc, char** argv)
|
||||||
|
|
||||||
if (sparams.api_keys.size() == 1) {
|
if (sparams.api_keys.size() == 1) {
|
||||||
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4);
|
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4);
|
||||||
}
|
} else if (sparams.api_keys.size() > 1) {
|
||||||
else if (sparams.api_keys.size() > 1) {
|
|
||||||
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2990,8 +2976,7 @@ int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
state.store(SERVER_STATE_ERROR);
|
state.store(SERVER_STATE_ERROR);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
llama.initialize();
|
llama.initialize();
|
||||||
state.store(SERVER_STATE_READY);
|
state.store(SERVER_STATE_READY);
|
||||||
LOG_INFO("model loaded", {});
|
LOG_INFO("model loaded", {});
|
||||||
|
@ -3081,8 +3066,7 @@ int main(int argc, char** argv)
|
||||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink)
|
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink)
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
|
@ -3103,8 +3087,7 @@ int main(int argc, char** argv)
|
||||||
if (result.stop) {
|
if (result.stop) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
const std::string str =
|
const std::string str =
|
||||||
"error: " +
|
"error: " +
|
||||||
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
|
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||||
|
@ -3175,14 +3158,12 @@ int main(int argc, char** argv)
|
||||||
res.set_content(oaicompat_result.dump(-1, ' ', false,
|
res.set_content(oaicompat_result.dump(-1, ' ', false,
|
||||||
json::error_handler_t::replace),
|
json::error_handler_t::replace),
|
||||||
"application/json; charset=utf-8");
|
"application/json; charset=utf-8");
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
res.status = 500;
|
res.status = 500;
|
||||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink &sink) {
|
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink &sink) {
|
||||||
while (true) {
|
while (true) {
|
||||||
task_result llama_result = llama.next_result(task_id);
|
task_result llama_result = llama.next_result(task_id);
|
||||||
|
@ -3205,8 +3186,7 @@ int main(int argc, char** argv)
|
||||||
if (llama_result.stop) {
|
if (llama_result.stop) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
const std::string str =
|
const std::string str =
|
||||||
"error: " +
|
"error: " +
|
||||||
llama_result.result_json.dump(-1, ' ', false,
|
llama_result.result_json.dump(-1, ' ', false,
|
||||||
|
@ -3253,8 +3233,7 @@ int main(int argc, char** argv)
|
||||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink) {
|
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink) {
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue