lookup : use n_draft from CLI params

2023-12-17 20:06:41 +02:00 · 2023-12-17 20:06:41 +02:00 · d8ed670c6c
commit d8ed670c6c
parent 5b27975479
2 changed files with 2 additions and 2 deletions
--- a/common/common.h
+++ b/common/common.h
@ -51,7 +51,7 @@ struct gpt_params {
    int32_t n_ctx                           = 512;   // context size
    int32_t n_batch                         = 512;   // batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep                          = 0;     // number of tokens to keep from initial prompt
-    int32_t n_draft                         = 16;    // number of tokens to draft during speculative decoding
+    int32_t n_draft                         = 8;     // number of tokens to draft during speculative decoding
    int32_t n_chunks                        = -1;    // max number of chunks to process (-1 = unlimited)
    int32_t n_parallel                      = 1;     // number of parallel sequences to decode
    int32_t n_sequences                     = 1;     // number of sequences to decode
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@ -17,7 +17,7 @@ int main(int argc, char ** argv){
    const int max_ngram_size = 3;

    // length of the candidate / draft sequence, if match is found
-    const int n_draft = 10;
+    const int n_draft = params.n_draft;

    const bool dump_kv_cache = params.dump_kv_cache;