llama.cpp : fix MEM_REQ_SCRATCH0 reusing the value of n_ctx of the first call

This commit is contained in:
slaren 2023-08-16 22:40:53 +02:00
parent 5765f90f58
commit 89a70f78e7
2 changed files with 12 additions and 11 deletions

View file

@ -477,7 +477,7 @@ const bool backend_params::blas = !!ggml_cpu_has_blas();
// benchmark params // benchmark params
struct bench_params { struct bench_params {
int n_prompt ; int n_prompt;
int n_gen; int n_gen;
static const std::vector<std::string> & get_fields() { static const std::vector<std::string> & get_fields() {
@ -630,17 +630,18 @@ struct markdown_printer : public printer {
virtual void print_header(const cmd_params & params) { virtual void print_header(const cmd_params & params) {
fields = { "model", "backend" }; fields = { "model", "backend" };
if (backend_params::get_backend() != "CPU") { bool is_cpu_backend = backend_params::get_backend() == "CPU" || backend_params::get_backend() == "BLAS";
if (!is_cpu_backend) {
fields.push_back("n_gpu_layers"); fields.push_back("n_gpu_layers");
} }
if (params.n_threads.size() > 1 || is_cpu_backend) {
fields.push_back("n_threads");
}
if (params.n_batch.size() > 1) { if (params.n_batch.size() > 1) {
fields.push_back("n_batch"); fields.push_back("n_batch");
} }
if (params.n_threads.size() > 1 || backend_params::get_backend() == "CPU") {
fields.push_back("n_threads");
}
if (params.f32_kv.size() > 1) { if (params.f32_kv.size() > 1) {
fields.push_back("f32_kv"); fields.push_back("f16_kv");
} }
if (params.main_gpu.size() > 1) { if (params.main_gpu.size() > 1) {
fields.push_back("main_gpu"); fields.push_back("main_gpu");
@ -723,9 +724,9 @@ void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int
std::vector<llama_token> tokens(n_batch, llama_token_bos()); std::vector<llama_token> tokens(n_batch, llama_token_bos());
int n_processed = 0; int n_processed = 0;
while (n_processed < n_prompt) { while (n_processed < n_prompt) {
int n = std::min(n_prompt - n_processed, n_batch); int n_tokens = std::min(n_prompt - n_processed, n_batch);
llama_eval(ctx, tokens.data(), n, n_past + n_processed, n_threads); llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);
n_processed += n; n_processed += n_tokens;
} }
} }

View file

@ -115,9 +115,9 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
// memory sizes (calculated for n_batch == 512) // memory sizes (calculated for n_batch == 512)
// //
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx) static std::map<e_model, size_t> MEM_REQ_SCRATCH0(int n_ctx)
{ {
static std::map<e_model, size_t> k_sizes = { std::map<e_model, size_t> k_sizes = {
{ MODEL_3B, ((size_t) n_ctx / 16ull + 92ull) * MB }, { MODEL_3B, ((size_t) n_ctx / 16ull + 92ull) * MB },
{ MODEL_7B, ((size_t) n_ctx / 16ull + 100ull) * MB }, { MODEL_7B, ((size_t) n_ctx / 16ull + 100ull) * MB },
{ MODEL_13B, ((size_t) n_ctx / 12ull + 120ull) * MB }, { MODEL_13B, ((size_t) n_ctx / 12ull + 120ull) * MB },