From d5a6e865f6d1377a6bbebcb8bb3091d202543ea9 Mon Sep 17 00:00:00 2001
From: Paul Tsochantaris <ptsochantaris@icloud.com>
Date: Thu, 8 Feb 2024 18:26:10 +0000
Subject: [PATCH] Whitespace

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index b12543298..847c25b0c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7285,7 +7285,7 @@ static int llama_decode_internal(
     // TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well
     //       we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering
     //       with the BLAS calls. need a better solution
-    // MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is 
+    // MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is
     //                   being processed then Accelerate/BLAS will not be involved, so capping would limit performance.
     if (n_tokens >= 32 && hparams.n_expert == 0 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) {
         n_threads = std::min(4, n_threads);