diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index e049927d0..fc382c68d 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1955,11 +1955,6 @@ struct server_context {
                             // reuse any previously computed tokens that are common with the new prompt
                             slot.n_past = longest_common_prefix(slot.cache_tokens, prompt_tokens);
 
-                            // push the prompt into the sampling context (do not apply grammar)
-                            for (int i = 0; i < slot.n_past; ++i) {
-                                common_sampler_accept(slot.smpl, slot.cache_tokens[i], false);
-                            }
-
                             // reuse chunks from the cached prompt by shifting their KV cache in the new position
                             if (params.n_cache_reuse > 0) {
                                 size_t head_c = slot.n_past; // cache
@@ -1991,9 +1986,6 @@ struct server_context {
 
                                         for (size_t i = 0; i < n_match; i++) {
                                             slot.cache_tokens[head_p + i] = slot.cache_tokens[head_c + i];
-
-                                            common_sampler_accept(slot.smpl, slot.cache_tokens[head_p + i], false);
-
                                             slot.n_past++;
                                         }
 
@@ -2074,6 +2066,11 @@ struct server_context {
 
                     GGML_ASSERT(batch.n_tokens > 0);
 
+                    // Process all prompt tokens through sampler system
+                    for (int i = 0; i < slot.n_prompt_tokens; ++i) {
+                        common_sampler_accept(slot.smpl, slot.prompt_tokens[i], false);
+                    }
+
                     // extract the logits only for the last token
                     batch.logits[batch.n_tokens - 1] = true;