From 5430726711ae12e17800297c15acf1495043c5ab Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 14 Nov 2024 15:22:13 +0000 Subject: [PATCH] Rename GGML_SYCL_ARCH to GGML_SYCL_DEVICE_ARCH --- docs/backend/SYCL.md | 16 ++++++++-------- ggml/CMakeLists.txt | 3 ++- ggml/src/CMakeLists.txt | 8 ++++---- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 2f54ed447..82192dc07 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -310,14 +310,14 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR # Build LLAMA with Nvidia BLAS acceleration through SYCL -# Setting GGML_SYCL_ARCH is optional but can improve performance -GGML_SYCL_ARCH=sm_80 # Example architecture +# Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance +GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture # Option 1: Use FP32 (recommended for better performance in most cases) -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # Option 2: Use FP16 -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # build all binary cmake --build build --config Release -j -v @@ -335,9 +335,9 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE ## AMD # Use FP32, FP16 is not supported -# Find your GGML_SYCL_ARCH with rocminfo, under the key 'Name:' -GGML_SYCL_ARCH=gfx90a # Example architecture -cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +# Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:' +GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture +cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # build all binary cmake --build build --config Release -j -v @@ -647,7 +647,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512 |--------------------|---------------------------------------|---------------------------------------------| | GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.
FP32 path - recommended for better perforemance than FP16 on quantized model| | GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. | -| GGML_SYCL_ARCH | "" | Set the SYCL target architecture, optional except for AMD. Setting the architecture can improve the performance. See the table [here](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. | +| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. | | GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. | | CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. | | CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. | diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index c03e4196b..ec027e180 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -166,7 +166,8 @@ option(GGML_SYCL "ggml: use SYCL" option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF) set (GGML_SYCL_TARGET "INTEL" CACHE STRING "ggml: sycl target device") -set (GGML_SYCL_ARCH "" CACHE STRING "ggml: sycl architecture") +set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING + "ggml: sycl device architecture") # extra artifacts option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 1900f12c4..3747d1916 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -608,15 +608,15 @@ if (GGML_SYCL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl) elseif (GGML_SYCL_TARGET STREQUAL "AMD") - if (NOT GGML_SYCL_ARCH) - message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_ARCH has not been set.") + if (NOT GGML_SYCL_DEVICE_ARCH) + message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa") list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl) endif() - if (GGML_SYCL_ARCH) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_ARCH}") + if (GGML_SYCL_DEVICE_ARCH) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}") endif() endif() endif()