set n_batch to 512 for all cases
This commit is contained in:
parent
131159ff1b
commit
b2e8a320c8
1 changed files with 1 additions and 2 deletions
|
@ -2,7 +2,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <string>
|
||||
|
@ -21,7 +20,7 @@ struct gpt_params {
|
|||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
||||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = ggml_cpu_has_blas() ? 512 : 8; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
|
||||
// sampling parameters
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue