[SYCL] Fix the sub group size of Intel (#8106)

* use warp_size macro for all sycl kernels

* fix mask of permute_sub_group_by_xor

* fix rms_norm with correct warp number

* fix rms_norm_f32/group_norm_f32

* move norm to norm.cpp file

* fix quantize bug

* fix mmvq's batch size
This commit is contained in:
luoyu-intel 2024-07-02 02:16:00 +00:00 committed by GitHub
parent 5fac350b9c
commit d08c20edde
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 587 additions and 509 deletions

View file

@ -486,9 +486,11 @@ if (GGML_SYCL)
add_compile_options(-I./) #include DPCT
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
else()
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
endif()
file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")