fix restoring zero cell count
This commit is contained in:
parent
a71ec3db7b
commit
bf1d4932f8
1 changed files with 18 additions and 14 deletions
32
llama.cpp
32
llama.cpp
|
@ -15283,7 +15283,7 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the new cells for the slot
|
// Allocate the new cells for the slot
|
||||||
{
|
if (cell_count) {
|
||||||
llama_batch batch = llama_batch_init(cell_count, 0, 1);
|
llama_batch batch = llama_batch_init(cell_count, 0, 1);
|
||||||
batch.n_tokens = cell_count;
|
batch.n_tokens = cell_count;
|
||||||
for (uint32_t i = 0; i < cell_count; ++i) {
|
for (uint32_t i = 0; i < cell_count; ++i) {
|
||||||
|
@ -15327,9 +15327,11 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read and set the keys for the whole cell range
|
if (cell_count) {
|
||||||
ggml_backend_tensor_set(kv_self.k_l[il], inp, kv_head * k_size_row, cell_count * k_size_row);
|
// Read and set the keys for the whole cell range
|
||||||
inp += cell_count * k_size_row;
|
ggml_backend_tensor_set(kv_self.k_l[il], inp, kv_head * k_size_row, cell_count * k_size_row);
|
||||||
|
inp += cell_count * k_size_row;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For each layer, read the values for each cell (transposed)
|
// For each layer, read the values for each cell (transposed)
|
||||||
|
@ -15339,17 +15341,19 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src,
|
||||||
memcpy(&v_size_el_ref, inp, sizeof(v_size_el_ref));
|
memcpy(&v_size_el_ref, inp, sizeof(v_size_el_ref));
|
||||||
inp += sizeof(v_size_el_ref);
|
inp += sizeof(v_size_el_ref);
|
||||||
|
|
||||||
const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type);
|
if (cell_count) {
|
||||||
if (v_size_el != v_size_el_ref) {
|
const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type);
|
||||||
llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1);
|
if (v_size_el != v_size_el_ref) {
|
||||||
return 0;
|
llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1);
|
||||||
}
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// For each row in the transposed matrix, read the values for the whole cell range
|
// For each row in the transposed matrix, read the values for the whole cell range
|
||||||
for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
|
for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
|
||||||
const size_t dst_offset = (kv_head + j * kv_size) * v_size_el;
|
const size_t dst_offset = (kv_head + j * kv_size) * v_size_el;
|
||||||
ggml_backend_tensor_set(kv_self.v_l[il], inp, dst_offset, cell_count * v_size_el);
|
ggml_backend_tensor_set(kv_self.v_l[il], inp, dst_offset, cell_count * v_size_el);
|
||||||
inp += cell_count * v_size_el;
|
inp += cell_count * v_size_el;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue