server : remove self-extend features (#9860)

* server : remove self-extend ggml-ci * server : fix context limit check to use slot.n_past ggml-ci
2024-10-12 16:06:31 +03:00 · 2024-10-12 16:06:31 +03:00 · 1bde94dd02
commit 1bde94dd02
parent 95c76e8e92
4 changed files with 57 additions and 142 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -1163,14 +1163,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
        [](common_params & params, int value) {
            params.grp_attn_n = value;
        }
-    ).set_env("LLAMA_ARG_GRP_ATTN_N"));
+    ).set_env("LLAMA_ARG_GRP_ATTN_N").set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_PASSKEY}));
    add_opt(common_arg(
        {"-gaw", "--grp-attn-w"}, "N",
-        string_format("group-attention width (default: %.1f)", (double)params.grp_attn_w),
+        string_format("group-attention width (default: %d)", params.grp_attn_w),
        [](common_params & params, int value) {
            params.grp_attn_w = value;
        }
-    ).set_env("LLAMA_ARG_GRP_ATTN_W"));
+    ).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN}));
    add_opt(common_arg(
        {"-dkvc", "--dump-kv-cache"},
        "verbose print of the KV cache",