From 7635b13ad740793a10dc0f375a501d78e59f14e3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 5 Mar 2024 17:22:28 +0200 Subject: [PATCH] server : minor --- examples/server/README.md | 2 +- examples/server/server.cpp | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 21da7a0a0..591f748f8 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -436,7 +436,7 @@ Notice that each `probs` is an array of length `n_probs`. "next_token": { "has_next_token": true, "n_remain": -1, - "num_tokens_predicted": 0, + "n_decoded": 0, "stopped_eos": false, "stopped_limit": false, "stopped_word": false, diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d6bab439a..898364617 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -841,15 +841,15 @@ struct llama_server_context { } LOG_VERBOSE("next token", { - {"token", result.tok}, - {"token_text", tokens_to_output_formatted_string(ctx, result.tok)}, - {"has_next_token", slot.has_next_token}, - {"n_remain", slot.n_remaining}, - {"num_tokens_predicted", slot.n_decoded}, - {"stopped_eos", slot.stopped_eos}, - {"stopped_word", slot.stopped_word}, - {"stopped_limit", slot.stopped_limit}, - {"stopping_word", slot.stopping_word}, + {"token", result.tok}, + {"token_text", tokens_to_output_formatted_string(ctx, result.tok)}, + {"has_next_token", slot.has_next_token}, + {"n_remain", slot.n_remaining}, + {"n_decoded", slot.n_decoded}, + {"stopped_eos", slot.stopped_eos}, + {"stopped_word", slot.stopped_word}, + {"stopped_limit", slot.stopped_limit}, + {"stopping_word", slot.stopping_word}, }); return slot.has_next_token; // continue @@ -1183,13 +1183,13 @@ struct llama_server_context { slot_data["state"] = slot.state; slot_data["prompt"] = slot.prompt; slot_data["next_token"] = { - {"has_next_token", slot.has_next_token}, - {"n_remain", slot.n_remaining}, - {"num_tokens_predicted", slot.n_decoded}, - {"stopped_eos", slot.stopped_eos}, - {"stopped_word", slot.stopped_word}, - {"stopped_limit", slot.stopped_limit}, - {"stopping_word", slot.stopping_word}, + {"has_next_token", slot.has_next_token}, + {"n_remain", slot.n_remaining}, + {"n_decoded", slot.n_decoded}, + {"stopped_eos", slot.stopped_eos}, + {"stopped_word", slot.stopped_word}, + {"stopped_limit", slot.stopped_limit}, + {"stopping_word", slot.stopping_word}, }; if (slot_data["state"] == IDLE) { @@ -2083,8 +2083,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, } else { std::string value(argv[i]); /**/ if (value == "distribute" || value == "" ) { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } - else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } - else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } + else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } + else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } else { invalid_param = true; break; } } } else if (arg == "--embedding") { @@ -2913,9 +2913,9 @@ int main(int argc, char ** argv) { json prompt; if (body.count("input") != 0) { prompt = body["input"]; - // batch if (prompt.is_array()) { json data = json::array(); + int i = 0; for (const json & elem : prompt) { const int id_task = llama.queue_tasks.get_new_id();