diff --git a/examples/duo/duo.cpp b/examples/duo/duo.cpp
index 75abf2467..4c65769de 100644
--- a/examples/duo/duo.cpp
+++ b/examples/duo/duo.cpp
@@ -136,7 +136,7 @@ static int speculation(
         }
         if (wait)
         {
-            std::this_thread::sleep_for(std::chrono::milliseconds{10});
+            std::this_thread::sleep_for(std::chrono::milliseconds{5});
             continue;
         }
 
@@ -158,7 +158,6 @@ static int speculation(
             auto& shared = spec_ctx->candidate;
             bool match = true;
             match_len = local.size() - 1;
-            fprintf(stderr, "spec #%d: %zu | %zu\n", active, shared.size(), local.size());
             for (size_t i = 0; i < std::min(shared.size(), local.size()); i++)
             {
                 if (shared[i] != local[i])
@@ -167,7 +166,7 @@ static int speculation(
                     match_len = i;
                     // here we need to clear both contexts
                     llama_kv_cache_seq_rm(ctx[0], 0, i, -1);
-                    llama_kv_cache_seq_rm(ctx[1], 0, i, -1);
+                    //llama_kv_cache_seq_rm(ctx[1], 0, i, -1);
                     break;
                 }
             }
@@ -318,17 +317,20 @@ static int target(
             break;
         }
 
-        fprintf(stderr, "tgt: input_seq.size() = %zu\n", input_seq.size());
+        fprintf(stderr, "\ntgt: input_seq.size() = %zu\n", input_seq.size());
 
         llama_batch_clear(batch);
         for (size_t i = 0; i < input_seq.size(); i++)
         {
             llama_batch_add(batch, input_seq[i], n_cur - 1 + i, { 0 }, true);
         }
+        auto s_us = ggml_time_us();
         if (llama_decode(ctx, batch)) {
             fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, 1);
             return 1;
         }
+        auto eval_us = ggml_time_us() - s_us;
+        fprintf(stderr, "eval_time: %lld", eval_us);
         logits_from = 0;
         logits_to   = input_seq.size();
     }