diff --git a/examples/common.h b/examples/common.h index d20aacd02..0470368d5 100644 --- a/examples/common.h +++ b/examples/common.h @@ -2,7 +2,6 @@ #pragma once -#include "ggml.h" #include "llama.h" #include @@ -21,7 +20,7 @@ struct gpt_params { int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) int32_t n_ctx = 512; // context size - int32_t n_batch = ggml_cpu_has_blas() ? 512 : 8; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) int32_t n_keep = 0; // number of tokens to keep from initial prompt // sampling parameters