batched : add bench tool (#3545)

* batched : add bench tool * batched : minor fix table * batched-bench : add readme + n_kv_max is now configurable * batched-bench : init warm-up batch * batched-bench : pass custom set of PP, TG and PL * batched-bench : add mmq CLI arg
2023-10-11 21:25:33 +03:00 · 2023-10-11 21:25:33 +03:00 · 8c70a5ff25
commit 8c70a5ff25
parent 24ba3d829e
7 changed files with 321 additions and 3 deletions
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@ -66,7 +66,7 @@ int main(int argc, char ** argv) {
    ctx_params.seed  = 1234;
    ctx_params.n_ctx = n_kv_req;
    ctx_params.n_batch = std::max(n_len, n_parallel);
-    ctx_params.n_threads = params.n_threads;
+    ctx_params.n_threads       = params.n_threads;
    ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;

    llama_context * ctx = llama_new_context_with_model(model, ctx_params);