llama : flash_attn cparam + fix defrag

This commit is contained in:
Georgi Gerganov 2024-04-17 12:00:35 +03:00
parent 2c41180e88
commit 599ce84a71
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 198 additions and 163 deletions

View file

@ -148,6 +148,7 @@ struct gpt_params {
bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
bool cont_batching = true; // insert new sequences for decoding on-the-fly
bool flash_attn = false; // flash attention
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool ignore_eos = false; // ignore generated EOS tokens