diff --git a/otherarch/gpt2_v3.cpp b/otherarch/gpt2_v3.cpp index 07ce68909..ba2222f99 100644 --- a/otherarch/gpt2_v3.cpp +++ b/otherarch/gpt2_v3.cpp @@ -347,6 +347,7 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g //gpu offload #if defined(GGML_USE_CLBLAST) + if(gpulayers>0) { const auto & hparams = model.hparams; size_t vram_total = 0; diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 56e292bef..0f0f82105 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -335,6 +335,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g //gpu offload #if defined(GGML_USE_CLBLAST) + if(gpulayers>0) { const auto & hparams = model.hparams; size_t vram_total = 0; diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp index e4f0f7719..f7ab03ec0 100644 --- a/otherarch/mpt_v3.cpp +++ b/otherarch/mpt_v3.cpp @@ -15,7 +15,9 @@ #include "model_adapter.h" - +#if defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" +#endif // load the model's weights from a file bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vocab, int gpulayers) { @@ -280,6 +282,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo //gpu offload #if defined(GGML_USE_CLBLAST) + if(gpulayers>0) { const auto & hparams = model.hparams; size_t vram_total = 0; diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index 715f13ac4..3084bbda7 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -13,7 +13,9 @@ #include #include - +#if defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" +#endif // load the model's weights from a file ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) { @@ -320,6 +322,7 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & //gpu offload #if defined(GGML_USE_CLBLAST) + if(gpulayers>0) { const auto & hparams = model.hparams; size_t vram_total = 0;