removed server changes
This commit is contained in:
parent
f5a23928c7
commit
2736688af4
3 changed files with 3 additions and 10 deletions
|
@ -691,7 +691,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.ctx_shift = false;
|
params.ctx_shift = false;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--chunks"}, "N",
|
{"--chunks"}, "N",
|
||||||
format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
|
format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
|
||||||
|
|
|
@ -161,7 +161,7 @@ A value of -1 will enable infinite text generation, even though we have a finite
|
||||||
|
|
||||||
If the pause is undesirable, a value of -2 will stop generation immediately when the context is filled.
|
If the pause is undesirable, a value of -2 will stop generation immediately when the context is filled.
|
||||||
|
|
||||||
The `--no-context-shift` options allows you to stop the inifinite text generation once the finite context window is full.
|
The `--no-context-shift` option allows you to stop the infinite text generation once the finite context window is full.
|
||||||
|
|
||||||
It is important to note that the generated text may be shorter than the specified number of tokens if an End-of-Sequence (EOS) token or a reverse prompt is encountered. In interactive mode, text generation will pause and control will be returned to the user. In non-interactive mode, the program will end. In both cases, the text generation may stop before reaching the specified `--predict` value. If you want the model to keep going without ever producing End-of-Sequence on its own, you can use the `--ignore-eos` parameter.
|
It is important to note that the generated text may be shorter than the specified number of tokens if an End-of-Sequence (EOS) token or a reverse prompt is encountered. In interactive mode, text generation will pause and control will be returned to the user. In non-interactive mode, the program will end. In both cases, the text generation may stop before reaching the specified `--predict` value. If you want the model to keep going without ever producing End-of-Sequence on its own, you can use the `--ignore-eos` parameter.
|
||||||
|
|
||||||
|
|
|
@ -1815,13 +1815,6 @@ struct server_context {
|
||||||
for (server_slot & slot : slots) {
|
for (server_slot & slot : slots) {
|
||||||
if (slot.ga_n == 1) {
|
if (slot.ga_n == 1) {
|
||||||
if (slot.is_processing() && (int) system_tokens.size() + slot.n_past >= slot.n_ctx - 1) {
|
if (slot.is_processing() && (int) system_tokens.size() + slot.n_past >= slot.n_ctx - 1) {
|
||||||
if (!params.ctx_shift){
|
|
||||||
slot.release();
|
|
||||||
slot.print_timings();
|
|
||||||
send_final_response(slot);
|
|
||||||
metrics.on_prediction(slot);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Shift context
|
// Shift context
|
||||||
const int n_keep = slot.params.n_keep + add_bos_token;
|
const int n_keep = slot.params.n_keep + add_bos_token;
|
||||||
const int n_left = (int) system_tokens.size() + slot.n_past - n_keep;
|
const int n_left = (int) system_tokens.size() + slot.n_past - n_keep;
|
||||||
|
@ -3175,4 +3168,4 @@ int main(int argc, char ** argv) {
|
||||||
t.join();
|
t.join();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue