From 3536cf60002ddb63126e02f7ee88c5ae88004e72 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Tue, 30 Jan 2024 16:37:00 -0500
Subject: [PATCH] llama : remove obsolete set of n_threads=1

---
 llama.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index dc0e6dcb2..a490eeab2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6878,11 +6878,6 @@ static int llama_decode_internal(
         n_threads = std::min(4, n_threads);
     }
 
-    const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
-    if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute()) && fully_offloaded) {
-        n_threads = 1;
-    }
-
 #ifdef GGML_USE_MPI
     const int64_t n_layer = hparams.n_layer;
     ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);