From d6b44fb3aef1e81c4075412280e888d205eeebae Mon Sep 17 00:00:00 2001
From: KerfuffleV2 <kerfliffle@keemail.me>
Date: Thu, 19 Oct 2023 21:14:23 -0600
Subject: [PATCH] Force measure to allocate more memory for 70Bs

---
 llama.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index ff47e6320..a4bd3932e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3266,14 +3266,11 @@ static struct ggml_cgraph * llm_build_llama(
             } else {
                 run_layer = NULL;
             }
-        } else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 1) {
-            // No idea why this is needed, but otherwise we run out of space
-            // when skipping attn or mlp (but not both) on the last layer
-            run_mlp = false;
-        } else if (ggml_allocr_is_measure(lctx.alloc) && il == n_layer - 2) {
-            // No idea why this is needed, but otherwise we run out of space
-            // when skipping attn or mlp (but not both) on the last layer
-            run_attn = false;
+        } else if (ggml_allocr_is_measure(lctx.alloc)) {
+            if (il == 0 || il == n_layer - 1) run_mlp = false;
+            else if (il == 1 || il == n_layer - 2) run_attn = false;
+            else if (il & 1) run_mlp = false;
+            else run_attn = false;
         }
         if (!run_attn && !run_mlp) continue;