lookup : use n_draft from CLI params
This commit is contained in:
parent
5b27975479
commit
d8ed670c6c
2 changed files with 2 additions and 2 deletions
|
@ -51,7 +51,7 @@ struct gpt_params {
|
|||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_draft = 16; // number of tokens to draft during speculative decoding
|
||||
int32_t n_draft = 8; // number of tokens to draft during speculative decoding
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
int32_t n_parallel = 1; // number of parallel sequences to decode
|
||||
int32_t n_sequences = 1; // number of sequences to decode
|
||||
|
|
|
@ -17,7 +17,7 @@ int main(int argc, char ** argv){
|
|||
const int max_ngram_size = 3;
|
||||
|
||||
// length of the candidate / draft sequence, if match is found
|
||||
const int n_draft = 10;
|
||||
const int n_draft = params.n_draft;
|
||||
|
||||
const bool dump_kv_cache = params.dump_kv_cache;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue