From 2d262ea9f0cbfba6d525b94a05b51cd6dfc55464 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Thu, 23 Mar 2023 20:50:09 +0100 Subject: [PATCH] fix perplexity - it's memory needs dont grow, so we skip it --- llama.cpp | 2 +- main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index bf86247ea..f79fa0bde 100644 --- a/llama.cpp +++ b/llama.cpp @@ -632,7 +632,7 @@ static bool llama_eval_internal( auto & mem_at_token1 = lctx.mem_at_token1; // TODO: fix this hardcoded size - static size_t buf_size = size_t(n_ctx)*1024*1024; + static size_t buf_size = size_t(n_ctx)*size_t(N)*128*1024; static void * buf = malloc(buf_size); const size_t C0 = mem_at_token0; // ~base diff --git a/main.cpp b/main.cpp index d50abdf5b..63bdce5cb 100644 --- a/main.cpp +++ b/main.cpp @@ -219,7 +219,7 @@ int main(int argc, char ** argv) { // (fill in mem_at_token0 and mem_at_token1) // TODO: better way to do that // TODO(Green-Sky): move to internal and detect first time usage - { + if (!params.perplexity) { // perplexity does not grow over time // we make 2 evals, of batchsize to take 2 measurements, to determine base and growth std::vector tmp(params.n_batch*2, 2); tmp[0] = llama_token_bos();