reduce default n_batch to 2048

This commit is contained in:
slaren 2024-03-13 18:37:17 +01:00
parent 1f564815a3
commit 976176d0dd
3 changed files with 3 additions and 3 deletions

View file

@ -51,7 +51,7 @@ struct gpt_params {
int32_t n_threads_batch_draft = -1; int32_t n_threads_batch_draft = -1;
int32_t n_predict = -1; // new tokens to predict int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 512; // context size int32_t n_ctx = 512; // context size
int32_t n_batch = 4096; // logical batch size for prompt processing (must be >=32 to use BLAS) int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_draft = 5; // number of tokens to draft during speculative decoding int32_t n_draft = 5; // number of tokens to draft during speculative decoding

View file

@ -184,7 +184,7 @@ static const cmd_params cmd_params_defaults = {
/* model */ {"models/7B/ggml-model-q4_0.gguf"}, /* model */ {"models/7B/ggml-model-q4_0.gguf"},
/* n_prompt */ {512}, /* n_prompt */ {512},
/* n_gen */ {128}, /* n_gen */ {128},
/* n_batch */ {4096}, /* n_batch */ {2048},
/* n_ubatch */ {512}, /* n_ubatch */ {512},
/* type_k */ {GGML_TYPE_F16}, /* type_k */ {GGML_TYPE_F16},
/* type_v */ {GGML_TYPE_F16}, /* type_v */ {GGML_TYPE_F16},

View file

@ -12579,7 +12579,7 @@ struct llama_context_params llama_context_default_params() {
struct llama_context_params result = { struct llama_context_params result = {
/*.seed =*/ LLAMA_DEFAULT_SEED, /*.seed =*/ LLAMA_DEFAULT_SEED,
/*.n_ctx =*/ 512, /*.n_ctx =*/ 512,
/*.n_batch =*/ 4096, /*.n_batch =*/ 2048,
/*.n_ubatch =*/ 512, /*.n_ubatch =*/ 512,
/*.n_seq_max =*/ 1, /*.n_seq_max =*/ 1,
/*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default /*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default