examples : add passkey test (#3856)

* examples : add passkey test

* passkey : better prints

* passkey : select pass key pos from CLI

* passkey : simplify n_past logic

* make : add passkey target

* passkey : add "self-extend"-like context extension (#4810)

* llama : "self-extend"-like context extension

* passkey : add comment

* passkey : add readme
This commit is contained in:
Georgi Gerganov 2024-01-08 11:14:04 +02:00 committed by GitHub
parent b7e7982953
commit b0034d93ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 361 additions and 1 deletions

View file

@ -1903,6 +1903,28 @@ static void llama_kv_cache_seq_shift(
cache.head = new_head != cache.size ? new_head : 0;
}
static void llama_kv_cache_seq_div(
struct llama_kv_cache & cache,
llama_seq_id seq_id,
llama_pos p0,
llama_pos p1,
int d) {
if (p0 < 0) p0 = 0;
if (p1 < 0) p1 = std::numeric_limits<llama_pos>::max();
for (uint32_t i = 0; i < cache.size; ++i) {
if (cache.cells[i].has_seq_id(seq_id) && cache.cells[i].pos >= p0 && cache.cells[i].pos < p1) {
cache.has_shift = true;
{
llama_pos p_old = cache.cells[i].pos;
cache.cells[i].pos /= d;
cache.cells[i].delta += cache.cells[i].pos - p_old;
}
}
}
}
//
// model loading and saving
//
@ -10140,9 +10162,21 @@ void llama_kv_cache_seq_keep(struct llama_context * ctx, llama_seq_id seq_id) {
}
void llama_kv_cache_seq_shift(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) {
if (delta == 0) {
return;
}
llama_kv_cache_seq_shift(ctx->kv_self, seq_id, p0, p1, delta);
}
void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
if (d == 1) {
return;
}
llama_kv_cache_seq_div(ctx->kv_self, seq_id, p0, p1, d);
}
// Returns the *maximum* size of the state
size_t llama_get_state_size(const struct llama_context * ctx) {
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.