reduce default n_batch to 2048
This commit is contained in:
parent
1f564815a3
commit
976176d0dd
3 changed files with 3 additions and 3 deletions
|
@ -51,7 +51,7 @@ struct gpt_params {
|
||||||
int32_t n_threads_batch_draft = -1;
|
int32_t n_threads_batch_draft = -1;
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_ctx = 512; // context size
|
int32_t n_ctx = 512; // context size
|
||||||
int32_t n_batch = 4096; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
||||||
|
|
|
@ -184,7 +184,7 @@ static const cmd_params cmd_params_defaults = {
|
||||||
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
|
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
|
||||||
/* n_prompt */ {512},
|
/* n_prompt */ {512},
|
||||||
/* n_gen */ {128},
|
/* n_gen */ {128},
|
||||||
/* n_batch */ {4096},
|
/* n_batch */ {2048},
|
||||||
/* n_ubatch */ {512},
|
/* n_ubatch */ {512},
|
||||||
/* type_k */ {GGML_TYPE_F16},
|
/* type_k */ {GGML_TYPE_F16},
|
||||||
/* type_v */ {GGML_TYPE_F16},
|
/* type_v */ {GGML_TYPE_F16},
|
||||||
|
|
|
@ -12579,7 +12579,7 @@ struct llama_context_params llama_context_default_params() {
|
||||||
struct llama_context_params result = {
|
struct llama_context_params result = {
|
||||||
/*.seed =*/ LLAMA_DEFAULT_SEED,
|
/*.seed =*/ LLAMA_DEFAULT_SEED,
|
||||||
/*.n_ctx =*/ 512,
|
/*.n_ctx =*/ 512,
|
||||||
/*.n_batch =*/ 4096,
|
/*.n_batch =*/ 2048,
|
||||||
/*.n_ubatch =*/ 512,
|
/*.n_ubatch =*/ 512,
|
||||||
/*.n_seq_max =*/ 1,
|
/*.n_seq_max =*/ 1,
|
||||||
/*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default
|
/*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue