add warp_size macro

This commit is contained in:
luoyu-intel 2024-07-01 09:19:49 +08:00
parent eb0d1325af
commit e4f1516e9d
2 changed files with 4 additions and 5 deletions

View file

@ -488,6 +488,9 @@ if (GGML_SYCL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
else()
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
endif()
file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")

View file

@ -16,11 +16,7 @@
#define GGML_SYCL_MAX_STREAMS 8
#define GGML_SYCL_MAX_BUFFERS 256
#ifdef GGML_SYCL_NV_ARCH
#define WARP_SIZE 32
#else
#define WARP_SIZE 16
#endif
#define WARP_SIZE GGML_SYCL_WARP_SIZE
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
#define SYCL_GELU_BLOCK_SIZE 256