fix perplexity - it's memory needs dont grow, so we skip it

2023-03-23 20:50:09 +01:00 · 2023-03-23 20:50:09 +01:00 · 2d262ea9f0
commit 2d262ea9f0
parent 424281a4fb
2 changed files with 2 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -632,7 +632,7 @@ static bool llama_eval_internal(
    auto & mem_at_token1 = lctx.mem_at_token1;

    // TODO: fix this hardcoded size
-    static size_t buf_size = size_t(n_ctx)*1024*1024;
+    static size_t buf_size = size_t(n_ctx)*size_t(N)*128*1024;
    static void * buf = malloc(buf_size);

    const size_t C0 = mem_at_token0; // ~base
--- a/main.cpp
+++ b/main.cpp
@ -219,7 +219,7 @@ int main(int argc, char ** argv) {
    // (fill in mem_at_token0 and mem_at_token1)
    // TODO: better way to do that
    // TODO(Green-Sky): move to internal and detect first time usage
-    {
+    if (!params.perplexity) { // perplexity does not grow over time
        // we make 2 evals, of batchsize to take 2 measurements, to determine base and growth
        std::vector<llama_token> tmp(params.n_batch*2, 2);
        tmp[0] = llama_token_bos();