server : remove self-extend features (#9860)

* server : remove self-extend

ggml-ci

* server : fix context limit check to use slot.n_past

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-12 16:06:31 +03:00 committed by GitHub
parent 95c76e8e92
commit 1bde94dd02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 57 additions and 142 deletions

View file

@ -1163,14 +1163,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, int value) {
params.grp_attn_n = value;
}
).set_env("LLAMA_ARG_GRP_ATTN_N"));
).set_env("LLAMA_ARG_GRP_ATTN_N").set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_PASSKEY}));
add_opt(common_arg(
{"-gaw", "--grp-attn-w"}, "N",
string_format("group-attention width (default: %.1f)", (double)params.grp_attn_w),
string_format("group-attention width (default: %d)", params.grp_attn_w),
[](common_params & params, int value) {
params.grp_attn_w = value;
}
).set_env("LLAMA_ARG_GRP_ATTN_W"));
).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"-dkvc", "--dump-kv-cache"},
"verbose print of the KV cache",