llama : unified KV cache + batch inference API

2023-09-18 10:08:22 +03:00 · 2023-09-18 10:08:22 +03:00 · d29e76937c
commit d29e76937c
parent fad56936d4
10 changed files with 315 additions and 236 deletions
--- a/common/common.h
+++ b/common/common.h
@ -111,7 +111,6 @@ struct gpt_params {
    bool use_mmap          = true;  // use mmap for faster loads
    bool use_mlock         = false; // use mlock to keep model in memory
    bool numa              = false; // attempt optimizations that help on some NUMA systems
-    bool export_cgraph     = false; // export the computation graph
    bool verbose_prompt    = false; // print prompt tokens before generation
 };