batched : add bench tool (#3545)

* batched : add bench tool

* batched : minor fix table

* batched-bench : add readme + n_kv_max is now configurable

* batched-bench : init warm-up batch

* batched-bench : pass custom set of PP, TG and PL

* batched-bench : add mmq CLI arg
This commit is contained in:
Georgi Gerganov 2023-10-11 21:25:33 +03:00 committed by GitHub
parent 24ba3d829e
commit 8c70a5ff25
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 321 additions and 3 deletions

View file

@ -1,8 +1,14 @@
# Define the default target now so that it is always the first target
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
BUILD_TARGETS = \
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench baby-llama beam-search \
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
# Binaries only useful for tests
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@ -557,6 +563,9 @@ simple: examples/simple/simple.cpp build-info.h ggml.
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
batched-bench: examples/batched-bench/batched-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)