set n_batch to 512 for all cases

2023-04-22 00:07:42 -04:00 · 2023-04-22 00:07:42 -04:00 · b2e8a320c8
commit b2e8a320c8
parent 131159ff1b
1 changed files with 1 additions and 2 deletions
--- a/examples/common.h
+++ b/examples/common.h
@ -2,7 +2,6 @@

 #pragma once

-#include "ggml.h"
 #include "llama.h"

 #include <string>
@ -21,7 +20,7 @@ struct gpt_params {
    int32_t repeat_last_n = 64;   // last n tokens to penalize
    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
    int32_t n_ctx         = 512;  // context size
-    int32_t n_batch       = ggml_cpu_has_blas() ? 512 : 8; // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt

    // sampling parameters