From d6b44fb3aef1e81c4075412280e888d205eeebae Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Thu, 19 Oct 2023 21:14:23 -0600 Subject: [PATCH] Force measure to allocate more memory for 70Bs --- llama.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llama.cpp b/llama.cpp index ff47e6320..a4bd3932e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3266,14 +3266,11 @@ static struct ggml_cgraph * llm_build_llama( } else { run_layer = NULL; } - } else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 1) { - // No idea why this is needed, but otherwise we run out of space - // when skipping attn or mlp (but not both) on the last layer - run_mlp = false; - } else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 2) { - // No idea why this is needed, but otherwise we run out of space - // when skipping attn or mlp (but not both) on the last layer - run_attn = false; + } else if (ggml_allocr_is_measure(lctx.alloc)) { + if (il == 0 || il == n_layer - 1) run_mlp = false; + else if (il == 1 || il == n_layer - 2) run_attn = false; + else if (il & 1) run_mlp = false; + else run_attn = false; } if (!run_attn && !run_mlp) continue;