diff --git a/common/common.h b/common/common.h
index 875e012a2..9659aa045 100644
--- a/common/common.h
+++ b/common/common.h
@@ -51,7 +51,7 @@ struct gpt_params {
     int32_t n_ctx                           = 512;   // context size
     int32_t n_batch                         = 512;   // batch size for prompt processing (must be >=32 to use BLAS)
     int32_t n_keep                          = 0;     // number of tokens to keep from initial prompt
-    int32_t n_draft                         = 16;    // number of tokens to draft during speculative decoding
+    int32_t n_draft                         = 8;     // number of tokens to draft during speculative decoding
     int32_t n_chunks                        = -1;    // max number of chunks to process (-1 = unlimited)
     int32_t n_parallel                      = 1;     // number of parallel sequences to decode
     int32_t n_sequences                     = 1;     // number of sequences to decode
diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp
index 6b4eb957a..ab1be0a32 100644
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@@ -17,7 +17,7 @@ int main(int argc, char ** argv){
     const int max_ngram_size = 3;
 
     // length of the candidate / draft sequence, if match is found
-    const int n_draft = 10;
+    const int n_draft = params.n_draft;
 
     const bool dump_kv_cache = params.dump_kv_cache;