diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp index b8abba66f..70a94fc16 100644 --- a/ggml/src/ggml-sycl/dmmv.cpp +++ b/ggml/src/ggml-sycl/dmmv.cpp @@ -3,7 +3,6 @@ #include "dequantize.hpp" #include "presets.hpp" -int constexpr QK_WARP_SIZE = 32; static void convert_f16(const void * vx, const int ib, const int iqs, dfloat2 & v){ const sycl::half *x = (const sycl::half *)vx; diff --git a/ggml/src/ggml-sycl/presets.hpp b/ggml/src/ggml-sycl/presets.hpp index c09c75dc7..15ddcac1f 100644 --- a/ggml/src/ggml-sycl/presets.hpp +++ b/ggml/src/ggml-sycl/presets.hpp @@ -62,4 +62,5 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA #define MUL_MAT_SRC1_COL_STRIDE 128 +#define QK_WARP_SIZE 32 #endif // GGML_SYCL_PRESETS_HPP