From 86469d15c40177676553cd6248a63635fb68db11 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Fri, 30 Jun 2023 12:40:08 +0800
Subject: [PATCH] fix for yr-rocm, large gpu scratch

---
 llama.cpp              | 5 +++--
 otherarch/gpt2_v3.cpp  | 4 +++-
 otherarch/gptj_v3.cpp  | 3 ++-
 otherarch/llama_v2.cpp | 5 ++++-
 otherarch/mpt_v3.cpp   | 3 ++-
 otherarch/neox_v3.cpp  | 3 ++-
 6 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index aa4ac4432..c225e2091 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12,7 +12,8 @@
 #include "ggml.h"
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif
 
@@ -1113,7 +1114,7 @@ static void llama_model_load_internal(
             fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__);
             ggml_cuda_set_scratch_size(0); // disable scratch
         } else {
-            vram_scratch = n_batch * MB;
+            vram_scratch = n_batch * MB * bigctxmul;
             ggml_cuda_set_scratch_size(vram_scratch);
             if (n_gpu_layers > 0) {
                 fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
diff --git a/otherarch/gpt2_v3.cpp b/otherarch/gpt2_v3.cpp
index af7c7f68d..b507357c4 100644
--- a/otherarch/gpt2_v3.cpp
+++ b/otherarch/gpt2_v3.cpp
@@ -18,10 +18,12 @@
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif
 
+
 // load the model's weights from a file
 ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) {
     printf("%s: loading model from '%s'\n", __func__, fname.c_str());
diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp
index 3ebc3efdd..d10d8172b 100644
--- a/otherarch/gptj_v3.cpp
+++ b/otherarch/gptj_v3.cpp
@@ -18,7 +18,8 @@
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif
 
diff --git a/otherarch/llama_v2.cpp b/otherarch/llama_v2.cpp
index 1dee94be7..ff9f4e6f3 100644
--- a/otherarch/llama_v2.cpp
+++ b/otherarch/llama_v2.cpp
@@ -9,12 +9,15 @@
 #include "llama_v2.h"
 
 #include "ggml_v2.h"
+
 #ifdef GGML_USE_CUBLAS
 #include "ggml_v2-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml_v2-opencl.h"
 #endif
 
+
 #include <array>
 #include <ctime>
 #include <cinttypes>
diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp
index cca7fc0ca..ef362a051 100644
--- a/otherarch/mpt_v3.cpp
+++ b/otherarch/mpt_v3.cpp
@@ -18,7 +18,8 @@
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif
 
diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp
index cc6ef973d..3eaeccede 100644
--- a/otherarch/neox_v3.cpp
+++ b/otherarch/neox_v3.cpp
@@ -16,7 +16,8 @@
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif