fix for yr-rocm, large gpu scratch

2023-06-30 12:40:08 +08:00 · 2023-06-30 12:40:08 +08:00 · 86469d15c4
commit 86469d15c4
parent 1347d3acc0
6 changed files with 16 additions and 7 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -12,7 +12,8 @@
 #include "ggml.h"
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif

@ -1113,7 +1114,7 @@ static void llama_model_load_internal(
            fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__);
            ggml_cuda_set_scratch_size(0); // disable scratch
        } else {
-            vram_scratch = n_batch * MB;
+            vram_scratch = n_batch * MB * bigctxmul;
            ggml_cuda_set_scratch_size(vram_scratch);
            if (n_gpu_layers > 0) {
                fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
--- a/otherarch/gpt2_v3.cpp
+++ b/otherarch/gpt2_v3.cpp
@ -18,10 +18,12 @@

 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif

+
 // load the model's weights from a file
 ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) {
    printf("%s: loading model from '%s'\n", __func__, fname.c_str());
--- a/otherarch/gptj_v3.cpp
+++ b/otherarch/gptj_v3.cpp
@ -18,7 +18,8 @@

 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif

--- a/otherarch/llama_v2.cpp
+++ b/otherarch/llama_v2.cpp
@ -9,12 +9,15 @@
 #include "llama_v2.h"

 #include "ggml_v2.h"
+
 #ifdef GGML_USE_CUBLAS
 #include "ggml_v2-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml_v2-opencl.h"
 #endif

+
 #include <array>
 #include <ctime>
 #include <cinttypes>
--- a/otherarch/mpt_v3.cpp
+++ b/otherarch/mpt_v3.cpp
@ -18,7 +18,8 @@

 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif

--- a/otherarch/neox_v3.cpp
+++ b/otherarch/neox_v3.cpp
@ -16,7 +16,8 @@

 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
-#elif defined(GGML_USE_CLBLAST)
+#endif
+#if defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #endif