llama : flash_attn cparam + fix defrag

2024-04-17 12:00:35 +03:00 · 2024-04-17 12:00:35 +03:00 · 599ce84a71
commit 599ce84a71
parent 2c41180e88
4 changed files with 198 additions and 163 deletions
--- a/common/common.h
+++ b/common/common.h
@ -148,6 +148,7 @@ struct gpt_params {
    bool multiline_input   = false; // reverse the usage of `\`
    bool simple_io         = false; // improves compatibility with subprocesses and limited consoles
    bool cont_batching     = true;  // insert new sequences for decoding on-the-fly
+    bool flash_attn        = false; // flash attention

    bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
    bool ignore_eos        = false; // ignore generated EOS tokens