main : add self-extend support (#4815)

* examples : add passkey test

* passkey : better prints

* passkey : select pass key pos from CLI

* passkey : simplify n_past logic

* llama : "self-extend"-like context extension

* passkey : add comment

* main : add Self-Extend support

* llama : add comment about llama_kv_cache_seq_div
This commit is contained in:
Georgi Gerganov 2024-01-08 11:18:32 +02:00 committed by GitHub
parent b0034d93ce
commit 52531fdff8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 24 deletions

View file

@ -62,6 +62,8 @@ struct gpt_params {
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
int32_t n_beams = 0; // if non-zero then use beam search of given width.
int32_t grp_attn_n = 1; // group-attention factor
int32_t grp_attn_w = 512; // group-attention width
float rope_freq_base = 0.0f; // RoPE base frequency
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor