server: Add "tokens per second" information in the backend (#10548)

* add cmake rvv support

* add timings

* remove space

* update readme

* fix

* fix code

* remove empty line

* add test

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
haopeng 2024-12-02 21:45:54 +08:00 committed by GitHub
parent 991f8aabee
commit 64ed2091b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 44 additions and 1 deletions

View file

@ -133,6 +133,7 @@ struct common_params_sampling {
bool penalize_nl = false; // consider newlines as a repeatable token
bool ignore_eos = false;
bool no_perf = false; // disable performance metrics
bool timing_per_token = false;
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY