diff --git a/llama.cpp b/llama.cpp index ffbac7382..f708d690c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15283,7 +15283,7 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src, } // Allocate the new cells for the slot - { + if (cell_count) { llama_batch batch = llama_batch_init(cell_count, 0, 1); batch.n_tokens = cell_count; for (uint32_t i = 0; i < cell_count; ++i) { @@ -15327,9 +15327,11 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src, return 0; } - // Read and set the keys for the whole cell range - ggml_backend_tensor_set(kv_self.k_l[il], inp, kv_head * k_size_row, cell_count * k_size_row); - inp += cell_count * k_size_row; + if (cell_count) { + // Read and set the keys for the whole cell range + ggml_backend_tensor_set(kv_self.k_l[il], inp, kv_head * k_size_row, cell_count * k_size_row); + inp += cell_count * k_size_row; + } } // For each layer, read the values for each cell (transposed) @@ -15339,17 +15341,19 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src, memcpy(&v_size_el_ref, inp, sizeof(v_size_el_ref)); inp += sizeof(v_size_el_ref); - const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type); - if (v_size_el != v_size_el_ref) { - llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1); - return 0; - } + if (cell_count) { + const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type); + if (v_size_el != v_size_el_ref) { + llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1); + return 0; + } - // For each row in the transposed matrix, read the values for the whole cell range - for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { - const size_t dst_offset = (kv_head + j * kv_size) * v_size_el; - ggml_backend_tensor_set(kv_self.v_l[il], inp, dst_offset, cell_count * v_size_el); - inp += cell_count * v_size_el; + // For each row in the transposed matrix, read the values for the whole cell range + for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { + const size_t dst_offset = (kv_head + j * kv_size) * v_size_el; + ggml_backend_tensor_set(kv_self.v_l[il], inp, dst_offset, cell_count * v_size_el); + inp += cell_count * v_size_el; + } } }