batched : add bench tool (#3545)
* batched : add bench tool * batched : minor fix table * batched-bench : add readme + n_kv_max is now configurable * batched-bench : init warm-up batch * batched-bench : pass custom set of PP, TG and PL * batched-bench : add mmq CLI arg
This commit is contained in:
parent
24ba3d829e
commit
8c70a5ff25
7 changed files with 321 additions and 3 deletions
13
Makefile
13
Makefile
|
@ -1,8 +1,14 @@
|
|||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||
BUILD_TARGETS = \
|
||||
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
|
||||
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench baby-llama beam-search \
|
||||
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||
|
||||
# Binaries only useful for tests
|
||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
|
||||
TEST_TARGETS = \
|
||||
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
|
||||
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
|
||||
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
|
||||
|
||||
# Code coverage output files
|
||||
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
||||
|
@ -557,6 +563,9 @@ simple: examples/simple/simple.cpp build-info.h ggml.
|
|||
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
batched-bench: examples/batched-bench/batched-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue