common : add command-line arg to disable KV cache offloading

This commit is contained in:
Georgi Gerganov 2023-12-03 20:31:01 +02:00
parent c80b8a2bff
commit e262947d43
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 65 additions and 48 deletions

View file

@ -123,6 +123,7 @@ struct gpt_params {
bool verbose_prompt = false; // print prompt tokens before generation
bool infill = false; // use infill mode
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
bool no_kv_offload = false; // disable KV offloading
// multimodal models (see examples/llava)
std::string mmproj = ""; // path to multimodal projector