add warp_size macro

2024-07-01 09:19:49 +08:00 · 2024-07-01 09:19:49 +08:00 · e4f1516e9d
commit e4f1516e9d
parent eb0d1325af
2 changed files with 4 additions and 5 deletions
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@ -488,6 +488,9 @@ if (GGML_SYCL)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
    if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
+        add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
+    else()
+        add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
    endif()

    file(GLOB   GGML_HEADERS_SYCL "ggml-sycl/*.hpp")
--- a/ggml/src/ggml-sycl/presets.hpp
+++ b/ggml/src/ggml-sycl/presets.hpp
@ -16,11 +16,7 @@
 #define GGML_SYCL_MAX_STREAMS       8
 #define GGML_SYCL_MAX_BUFFERS       256

-#ifdef GGML_SYCL_NV_ARCH
-#define WARP_SIZE 32
-#else
-#define WARP_SIZE 16
-#endif
+#define WARP_SIZE GGML_SYCL_WARP_SIZE
 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses

 #define SYCL_GELU_BLOCK_SIZE 256