From 86469d15c40177676553cd6248a63635fb68db11 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 30 Jun 2023 12:40:08 +0800 Subject: [PATCH] fix for yr-rocm, large gpu scratch --- llama.cpp | 5 +++-- otherarch/gpt2_v3.cpp | 4 +++- otherarch/gptj_v3.cpp | 3 ++- otherarch/llama_v2.cpp | 5 ++++- otherarch/mpt_v3.cpp | 3 ++- otherarch/neox_v3.cpp | 3 ++- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/llama.cpp b/llama.cpp index aa4ac4432..c225e2091 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12,7 +12,8 @@ #include "ggml.h" #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif @@ -1113,7 +1114,7 @@ static void llama_model_load_internal( fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__); ggml_cuda_set_scratch_size(0); // disable scratch } else { - vram_scratch = n_batch * MB; + vram_scratch = n_batch * MB * bigctxmul; ggml_cuda_set_scratch_size(vram_scratch); if (n_gpu_layers > 0) { fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n", diff --git a/otherarch/gpt2_v3.cpp b/otherarch/gpt2_v3.cpp index af7c7f68d..b507357c4 100644 --- a/otherarch/gpt2_v3.cpp +++ b/otherarch/gpt2_v3.cpp @@ -18,10 +18,12 @@ #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif + // load the model's weights from a file ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 3ebc3efdd..d10d8172b 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -18,7 +18,8 @@ #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif diff --git a/otherarch/llama_v2.cpp b/otherarch/llama_v2.cpp index 1dee94be7..ff9f4e6f3 100644 --- a/otherarch/llama_v2.cpp +++ b/otherarch/llama_v2.cpp @@ -9,12 +9,15 @@ #include "llama_v2.h" #include "ggml_v2.h" + #ifdef GGML_USE_CUBLAS #include "ggml_v2-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml_v2-opencl.h" #endif + #include #include #include diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp index cca7fc0ca..ef362a051 100644 --- a/otherarch/mpt_v3.cpp +++ b/otherarch/mpt_v3.cpp @@ -18,7 +18,8 @@ #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index cc6ef973d..3eaeccede 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -16,7 +16,8 @@ #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif