diff --git a/otherarch/gpt2_v3.cpp b/otherarch/gpt2_v3.cpp
index 07ce68909..ba2222f99 100644
--- a/otherarch/gpt2_v3.cpp
+++ b/otherarch/gpt2_v3.cpp
@@ -347,6 +347,7 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g
 
     //gpu offload
     #if defined(GGML_USE_CLBLAST)
+    if(gpulayers>0)
     {
         const auto & hparams = model.hparams;
         size_t vram_total = 0;
diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp
index 56e292bef..0f0f82105 100644
--- a/otherarch/gptj_v3.cpp
+++ b/otherarch/gptj_v3.cpp
@@ -335,6 +335,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
 
     //gpu offload
     #if defined(GGML_USE_CLBLAST)
+    if(gpulayers>0)
     {
         const auto & hparams = model.hparams;
         size_t vram_total = 0;
diff --git a/otherarch/mpt_v3.cpp b/otherarch/mpt_v3.cpp
index e4f0f7719..f7ab03ec0 100644
--- a/otherarch/mpt_v3.cpp
+++ b/otherarch/mpt_v3.cpp
@@ -15,7 +15,9 @@
 
 #include "model_adapter.h"
 
-
+#if defined(GGML_USE_CLBLAST)
+#include "ggml-opencl.h"
+#endif
 
 // load the model's weights from a file
 bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vocab, int gpulayers) {
@@ -280,6 +282,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
 
     //gpu offload
     #if defined(GGML_USE_CLBLAST)
+    if(gpulayers>0)
     {
         const auto & hparams = model.hparams;
         size_t vram_total = 0;
diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp
index 715f13ac4..3084bbda7 100644
--- a/otherarch/neox_v3.cpp
+++ b/otherarch/neox_v3.cpp
@@ -13,7 +13,9 @@
 #include <vector>
 #include <iostream>
 
-
+#if defined(GGML_USE_CLBLAST)
+#include "ggml-opencl.h"
+#endif
 
 // load the model's weights from a file
 ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_vocab & vocab, FileFormat file_format, int gpulayers) {
@@ -320,6 +322,7 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
 
     //gpu offload
     #if defined(GGML_USE_CLBLAST)
+    if(gpulayers>0)
     {
         const auto & hparams = model.hparams;
         size_t vram_total = 0;