parallel : rename hot-plug to continuous-batching
This commit is contained in:
parent
e1067efbfa
commit
a1327c71c6
3 changed files with 11 additions and 11 deletions
|
@ -372,8 +372,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
params.multiline_input = true;
|
||||
} else if (arg == "--simple-io") {
|
||||
params.simple_io = true;
|
||||
} else if (arg == "--hot-plug") {
|
||||
params.hot_plug = true;
|
||||
} else if (arg == "-cb" || arg == "--cont-batching") {
|
||||
params.cont_batching = true;
|
||||
} else if (arg == "--color") {
|
||||
params.use_color = true;
|
||||
} else if (arg == "--mlock") {
|
||||
|
@ -675,7 +675,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
printf(" --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
||||
printf(" -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
|
||||
printf(" -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
|
||||
printf(" --hot-plug enable hot-plugging of new sequences for decoding (default: disabled)\n");
|
||||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
|
||||
if (llama_mlock_supported()) {
|
||||
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||
}
|
||||
|
@ -1270,7 +1270,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||
fprintf(stream, "rope_freq_scale: %f # default: 1.0\n", params.rope_freq_scale);
|
||||
fprintf(stream, "seed: %d # default: -1 (random seed)\n", params.seed);
|
||||
fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false");
|
||||
fprintf(stream, "hot_plug: %s # default: false\n", params.hot_plug ? "true" : "false");
|
||||
fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false");
|
||||
fprintf(stream, "temp: %f # default: 0.8\n", params.temp);
|
||||
|
||||
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + LLAMA_MAX_DEVICES);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue