From a2b48b95f59fd96007fd5a59c52744671a0f7c49 Mon Sep 17 00:00:00 2001 From: Jan Boon Date: Thu, 28 Mar 2024 01:11:07 +0800 Subject: [PATCH] cleanup error cases --- examples/server/server.cpp | 2 +- llama.cpp | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a86a20ae6..c39b59a83 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1693,7 +1693,7 @@ struct server_context { size_t nread = llama_set_seq_data(ctx, state_data.data(), slot->id + 1); if (nread == 0) { - send_error(task, "Unable to restore slot, no available space in KV cache", ERROR_TYPE_INVALID_REQUEST); + send_error(task, "Unable to restore slot, no available space in KV cache or invalid slot save file", ERROR_TYPE_INVALID_REQUEST); break; } GGML_ASSERT(nread <= state_data.size()); diff --git a/llama.cpp b/llama.cpp index 6987a5344..8151b24e6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15290,8 +15290,6 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama const uint32_t kv_size = kv_self.size; const uint32_t kv_head = kv_self.head; - GGML_ASSERT(n_layer == n_layer_ref); - GGML_ASSERT(n_embd_v_gqa == n_embd_v_gqa_ref); // For each layer, read the keys for each cell, one row is one cell, read as one contiguous blo for (int il = 0; il < (int)n_layer; ++il) { @@ -15300,7 +15298,10 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama memcpy(&k_size_row_ref, inp, sizeof(k_size_row_ref)); inp += sizeof(k_size_row_ref); const size_t k_size_row = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa); - GGML_ASSERT(k_size_row == k_size_row_ref); + if (k_size_row != k_size_row_ref) { + llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1); + return 0; + } // Read and set the keys for the whole cell range ggml_backend_tensor_set(kv_self.k_l[il], inp, kv_head * k_size_row, cell_count * k_size_row); @@ -15315,7 +15316,10 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama inp += sizeof(v_size_el_ref); const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type); - GGML_ASSERT(v_size_el == v_size_el_ref); + if (v_size_el != v_size_el_ref) { + llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1); + return 0; + } // For each row in the transposed matrix, read the values for the whole cell range for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {