llama-bench : add pp+tg test type (#7199)
This commit is contained in:
parent
18e437665c
commit
e849648888
3 changed files with 74 additions and 15 deletions
|
@ -161,10 +161,17 @@ static const char * split_mode_str(llama_split_mode mode) {
|
|||
}
|
||||
}
|
||||
|
||||
static std::string pair_str(const std::pair<int, int> & p) {
|
||||
static char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%d,%d", p.first, p.second);
|
||||
return buf;
|
||||
}
|
||||
|
||||
struct cmd_params {
|
||||
std::vector<std::string> model;
|
||||
std::vector<int> n_prompt;
|
||||
std::vector<int> n_gen;
|
||||
std::vector<std::pair<int, int>> n_pg;
|
||||
std::vector<int> n_batch;
|
||||
std::vector<int> n_ubatch;
|
||||
std::vector<ggml_type> type_k;
|
||||
|
@ -188,6 +195,7 @@ static const cmd_params cmd_params_defaults = {
|
|||
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
|
||||
/* n_prompt */ {512},
|
||||
/* n_gen */ {128},
|
||||
/* n_pg */ {{512, 128}},
|
||||
/* n_batch */ {2048},
|
||||
/* n_ubatch */ {512},
|
||||
/* type_k */ {GGML_TYPE_F16},
|
||||
|
@ -215,10 +223,11 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||
printf(" -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
|
||||
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
||||
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
|
||||
printf(" -ub N, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
|
||||
printf(" -ctk <t>, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
|
||||
printf(" -ctv <t>, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
|
||||
printf(" -ub, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
|
||||
printf(" -ctk, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
|
||||
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
|
||||
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||
|
@ -304,6 +313,17 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_gen.insert(params.n_gen.end(), p.begin(), p.end());
|
||||
} else if (arg == "-pg") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<std::string>(argv[i], ',');
|
||||
if (p.size() != 2) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_pg.push_back({std::stoi(p[0]), std::stoi(p[1])});
|
||||
} else if (arg == "-b" || arg == "--batch-size") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
|
@ -493,6 +513,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||
if (params.model.empty()) { params.model = cmd_params_defaults.model; }
|
||||
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; }
|
||||
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; }
|
||||
if (params.n_pg.empty()) { params.n_pg = cmd_params_defaults.n_pg; }
|
||||
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; }
|
||||
if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; }
|
||||
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; }
|
||||
|
@ -632,6 +653,31 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||
};
|
||||
instances.push_back(instance);
|
||||
}
|
||||
|
||||
for (const auto & n_pg : params.n_pg) {
|
||||
if (n_pg.first == 0 && n_pg.second == 0) {
|
||||
continue;
|
||||
}
|
||||
cmd_params_instance instance = {
|
||||
/* .model = */ m,
|
||||
/* .n_prompt = */ n_pg.first,
|
||||
/* .n_gen = */ n_pg.second,
|
||||
/* .n_batch = */ nb,
|
||||
/* .n_ubatch = */ nub,
|
||||
/* .type_k = */ tk,
|
||||
/* .type_v = */ tv,
|
||||
/* .n_threads = */ nt,
|
||||
/* .n_gpu_layers = */ nl,
|
||||
/* .split_mode = */ sm,
|
||||
/* .main_gpu = */ mg,
|
||||
/* .no_kv_offload= */ nkvo,
|
||||
/* .flash_attn = */ fa,
|
||||
/* .tensor_split = */ ts,
|
||||
/* .use_mmap = */ mmp,
|
||||
/* .embeddings = */ embd,
|
||||
};
|
||||
instances.push_back(instance);
|
||||
}
|
||||
}
|
||||
|
||||
return instances;
|
||||
|
@ -965,6 +1011,9 @@ struct markdown_printer : public printer {
|
|||
if (field == "n_gpu_layers") {
|
||||
return 3;
|
||||
}
|
||||
if (field == "test") {
|
||||
return 13;
|
||||
}
|
||||
|
||||
int width = std::max((int)field.length(), 10);
|
||||
|
||||
|
@ -1091,12 +1140,11 @@ struct markdown_printer : public printer {
|
|||
value = test::get_backend();
|
||||
} else if (field == "test") {
|
||||
if (t.n_prompt > 0 && t.n_gen == 0) {
|
||||
snprintf(buf, sizeof(buf), "pp %d", t.n_prompt);
|
||||
snprintf(buf, sizeof(buf), "pp%d", t.n_prompt);
|
||||
} else if (t.n_gen > 0 && t.n_prompt == 0) {
|
||||
snprintf(buf, sizeof(buf), "tg %d", t.n_gen);
|
||||
snprintf(buf, sizeof(buf), "tg%d", t.n_gen);
|
||||
} else {
|
||||
assert(false);
|
||||
exit(1);
|
||||
snprintf(buf, sizeof(buf), "pp%d+tg%d", t.n_prompt, t.n_gen);
|
||||
}
|
||||
value = buf;
|
||||
} else if (field == "t/s") {
|
||||
|
@ -1297,6 +1345,7 @@ int main(int argc, char ** argv) {
|
|||
llama_kv_cache_clear(ctx);
|
||||
|
||||
uint64_t t_start = get_time_ns();
|
||||
|
||||
if (t.n_prompt > 0) {
|
||||
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue