diff --git a/src/llama.cpp b/src/llama.cpp
index 607f27861..290015766 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -8623,6 +8623,12 @@ static int llama_decode_impl(
     }
 
     while (lctx.sbatch.n_tokens > 0) {
+        // If aborted, break out
+        if (lctx.abort_callback != nullptr && lctx.abort_callback(lctx.abort_callback_data)) {
+            LLAMA_LOG_ERROR("%s: token decode aborted\n", __func__);
+            return -1;
+        }
+
         llama_ubatch ubatch;
         {
             const int ret = llama_prepare_ubatch(lctx, kv_slot_restorer, ubatch, n_outputs, batch.n_tokens);