diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index fdd6ff01f..adcfa79f9 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1745,10 +1745,7 @@ struct server_context {
 
                     // Erase token cache
                     const size_t n_erased = slot->cache_tokens.size();
-                    if (!llama_kv_cache_seq_rm(ctx, slot->id + 1, -1, -1)) {
-                        send_error(task, "Failed to erase slot KV cache", ERROR_TYPE_INVALID_REQUEST);
-                        break;
-                    }
+                    llama_kv_cache_seq_rm(ctx, slot->id + 1, -1, -1);
                     slot->cache_tokens.clear();
 
                     server_task_result result;
diff --git a/llama.cpp b/llama.cpp
index ac8703ca2..145942078 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -15243,9 +15243,7 @@ size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src,
     GGML_ASSERT(!kv_self.recurrent); // not implemented
 
     // Wipe the slot
-    if (!llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1)) {
-        return 0;
-    }
+    llama_kv_cache_seq_rm(kv_self, dest_seq_id, -1, -1);
 
     const uint8_t * inp = src;
 
diff --git a/llama.h b/llama.h
index 3c313b884..0473f726a 100644
--- a/llama.h
+++ b/llama.h
@@ -523,6 +523,7 @@ extern "C" {
             struct llama_context * ctx);
 
     // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
+    // Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
     // seq_id < 0 : match any sequence
     // p0 < 0     : [0,  p1]
     // p1 < 0     : [p0, inf)