lookup : use n_draft from CLI params

This commit is contained in:
Georgi Gerganov 2023-12-17 20:06:41 +02:00
parent 5b27975479
commit d8ed670c6c
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 2 additions and 2 deletions

View file

@ -51,7 +51,7 @@ struct gpt_params {
int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_draft = 16; // number of tokens to draft during speculative decoding
int32_t n_draft = 8; // number of tokens to draft during speculative decoding
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
int32_t n_parallel = 1; // number of parallel sequences to decode
int32_t n_sequences = 1; // number of sequences to decode

View file

@ -17,7 +17,7 @@ int main(int argc, char ** argv){
const int max_ngram_size = 3;
// length of the candidate / draft sequence, if match is found
const int n_draft = 10;
const int n_draft = params.n_draft;
const bool dump_kv_cache = params.dump_kv_cache;