fix for yr-rocm, large gpu scratch
This commit is contained in:
parent
1347d3acc0
commit
86469d15c4
6 changed files with 16 additions and 7 deletions
|
@ -12,7 +12,8 @@
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1113,7 +1114,7 @@ static void llama_model_load_internal(
|
||||||
fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__);
|
fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__);
|
||||||
ggml_cuda_set_scratch_size(0); // disable scratch
|
ggml_cuda_set_scratch_size(0); // disable scratch
|
||||||
} else {
|
} else {
|
||||||
vram_scratch = n_batch * MB;
|
vram_scratch = n_batch * MB * bigctxmul;
|
||||||
ggml_cuda_set_scratch_size(vram_scratch);
|
ggml_cuda_set_scratch_size(vram_scratch);
|
||||||
if (n_gpu_layers > 0) {
|
if (n_gpu_layers > 0) {
|
||||||
fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
|
fprintf(stderr, "%s: allocating batch_size x 1 MB = %zd MB VRAM for the scratch buffer\n",
|
||||||
|
|
|
@ -18,10 +18,12 @@
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// load the model's weights from a file
|
// load the model's weights from a file
|
||||||
ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) {
|
ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) {
|
||||||
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
|
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
|
||||||
|
|
|
@ -18,7 +18,8 @@
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -9,12 +9,15 @@
|
||||||
#include "llama_v2.h"
|
#include "llama_v2.h"
|
||||||
|
|
||||||
#include "ggml_v2.h"
|
#include "ggml_v2.h"
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml_v2-cuda.h"
|
#include "ggml_v2-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml_v2-opencl.h"
|
#include "ggml_v2-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
|
|
@ -18,7 +18,8 @@
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,8 @@
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue