diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index a073279d8..0b84a3c1c 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -138,6 +138,7 @@ struct slot_params {
 
         return json {
             {"n_predict",                 n_predict},     // Server configured n_predict
+            {"seed",                      sampling.seed},
             {"temperature",               sampling.temp},
             {"dynatemp_range",            sampling.dynatemp_range},
             {"dynatemp_exponent",         sampling.dynatemp_exponent},
@@ -1381,7 +1382,6 @@ struct server_context {
         }
 
         default_generation_settings_for_props = slots[0].to_json();
-        default_generation_settings_for_props["seed"] = -1;
 
         // the update_slots() logic will always submit a maximum of n_batch or n_parallel tokens
         // note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)