add warp_size macro
This commit is contained in:
parent
eb0d1325af
commit
e4f1516e9d
2 changed files with 4 additions and 5 deletions
|
@ -488,6 +488,9 @@ if (GGML_SYCL)
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
||||
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
||||
else()
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
|
||||
endif()
|
||||
|
||||
file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")
|
||||
|
|
|
@ -16,11 +16,7 @@
|
|||
#define GGML_SYCL_MAX_STREAMS 8
|
||||
#define GGML_SYCL_MAX_BUFFERS 256
|
||||
|
||||
#ifdef GGML_SYCL_NV_ARCH
|
||||
#define WARP_SIZE 32
|
||||
#else
|
||||
#define WARP_SIZE 16
|
||||
#endif
|
||||
#define WARP_SIZE GGML_SYCL_WARP_SIZE
|
||||
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
||||
|
||||
#define SYCL_GELU_BLOCK_SIZE 256
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue