llama : adjust default context size + print warnings (#10136)
* llama : adjust default context size + print warnings ggml-ci * ggml-ci : add missing gpu-layers + adjust context sizes
This commit is contained in:
parent
b634f8a26f
commit
1926d6e39d
3 changed files with 103 additions and 89 deletions
|
@ -155,7 +155,7 @@ struct common_sampler_params {
|
|||
|
||||
struct common_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 0; // context size
|
||||
int32_t n_ctx = 4096; // context size
|
||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue