Changed to params/args

Seed is now set by the CLI, defaults to -1 if not seed is set
Threads and batch size are now properly launch parameters.
This commit is contained in:
digiwombat 2023-05-28 06:29:11 -04:00
parent 549291fe61
commit 177868e68a

View file

@ -293,7 +293,7 @@ void server_print_usage(int /*argc*/, char **argv, const gpt_params &params)
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n");
fprintf(stderr, " --embedding enable embedding mode\n");
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
@ -343,18 +343,6 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para
}
sparams.hostname = argv[i];
}
else if (arg == "-s" || arg == "--seed")
{
#if defined(GGML_USE_CUBLAS)
fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n");
#endif
if (++i >= argc)
{
invalid_param = true;
break;
}
params.seed = std::stoi(argv[i]);
}
else if (arg == "-m" || arg == "--model")
{
if (++i >= argc)
@ -386,6 +374,23 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para
{
params.memory_f16 = false;
}
else if (arg == "--threads" || arg == "-t")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_threads = std::stoi(argv[i]);
}
else if (arg == "-b" || arg == "--batch-size")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_batch = std::stoi(argv[i]);
params.n_batch = std::min(512, params.n_batch);
}
else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers")
{
if (++i >= argc)
@ -491,14 +496,18 @@ bool parse_options_completion(json body, llama_server_context& llama, Response &
{
llama.params.penalize_nl = body["penalize_nl"].get<float>();
}
if (!body["batch_size"].is_null())
{
llama.params.n_batch = body["batch_size"].get<int>();
}
if (!body["n_keep"].is_null())
{
llama.params.n_keep = body["n_keep"].get<int>();
}
if (!body["seed"].is_null())
{
llama.params.seed = body["seed"].get<int>();
}
else
{
llama.params.seed = -1;
}
if (!body["prompt"].is_null())
{
llama.params.prompt = body["prompt"].get<std::string>();