llama : adjust default context size + print warnings (#10136)

* llama : adjust default context size + print warnings

ggml-ci

* ggml-ci : add missing gpu-layers + adjust context sizes
This commit is contained in:
Georgi Gerganov 2024-11-02 15:18:56 +02:00 committed by GitHub
parent b634f8a26f
commit 1926d6e39d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 103 additions and 89 deletions

View file

@ -155,7 +155,7 @@ struct common_sampler_params {
struct common_params {
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 0; // context size
int32_t n_ctx = 4096; // context size
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt