From 177868e68a135d03d4654a55bc2aad57c9a39e7a Mon Sep 17 00:00:00 2001 From: digiwombat Date: Sun, 28 May 2023 06:29:11 -0400 Subject: [PATCH] Changed to params/args Seed is now set by the CLI, defaults to -1 if not seed is set Threads and batch size are now properly launch parameters. --- examples/server/server.cpp | 43 +++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c95226e66..2ab532763 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -293,7 +293,7 @@ void server_print_usage(int /*argc*/, char **argv, const gpt_params ¶ms) fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n"); + fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n"); fprintf(stderr, " --embedding enable embedding mode\n"); fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); @@ -343,18 +343,6 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para } sparams.hostname = argv[i]; } - else if (arg == "-s" || arg == "--seed") - { -#if defined(GGML_USE_CUBLAS) - fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n"); -#endif - if (++i >= argc) - { - invalid_param = true; - break; - } - params.seed = std::stoi(argv[i]); - } else if (arg == "-m" || arg == "--model") { if (++i >= argc) @@ -386,6 +374,23 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para { params.memory_f16 = false; } + else if (arg == "--threads" || arg == "-t") + { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_threads = std::stoi(argv[i]); + } + else if (arg == "-b" || arg == "--batch-size") + { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_batch = std::stoi(argv[i]); + params.n_batch = std::min(512, params.n_batch); + } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { if (++i >= argc) @@ -491,14 +496,18 @@ bool parse_options_completion(json body, llama_server_context& llama, Response & { llama.params.penalize_nl = body["penalize_nl"].get(); } - if (!body["batch_size"].is_null()) - { - llama.params.n_batch = body["batch_size"].get(); - } if (!body["n_keep"].is_null()) { llama.params.n_keep = body["n_keep"].get(); } + if (!body["seed"].is_null()) + { + llama.params.seed = body["seed"].get(); + } + else + { + llama.params.seed = -1; + } if (!body["prompt"].is_null()) { llama.params.prompt = body["prompt"].get();