diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index d50bad0ae..a18198f16 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -488,6 +488,9 @@ if (GGML_SYCL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") if (GGML_SYCL_TARGET STREQUAL "NVIDIA") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + add_compile_definitions(GGML_SYCL_WARP_SIZE=32) + else() + add_compile_definitions(GGML_SYCL_WARP_SIZE=16) endif() file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp") diff --git a/ggml/src/ggml-sycl/presets.hpp b/ggml/src/ggml-sycl/presets.hpp index d17bed2cc..c09c75dc7 100644 --- a/ggml/src/ggml-sycl/presets.hpp +++ b/ggml/src/ggml-sycl/presets.hpp @@ -16,11 +16,7 @@ #define GGML_SYCL_MAX_STREAMS 8 #define GGML_SYCL_MAX_BUFFERS 256 -#ifdef GGML_SYCL_NV_ARCH -#define WARP_SIZE 32 -#else -#define WARP_SIZE 16 -#endif +#define WARP_SIZE GGML_SYCL_WARP_SIZE #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses #define SYCL_GELU_BLOCK_SIZE 256