enable SYCL_F16 support
This commit is contained in:
parent
f396a3b65e
commit
f008cc7b68
3 changed files with 17 additions and 7 deletions
|
@ -105,6 +105,7 @@ option(LLAMA_SYCL "llama: use SYCL"
|
||||||
option(LLAMA_MPI "llama: use MPI" OFF)
|
option(LLAMA_MPI "llama: use MPI" OFF)
|
||||||
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
||||||
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
||||||
|
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
||||||
|
|
||||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||||
|
@ -455,8 +456,9 @@ if (LLAMA_SYCL)
|
||||||
#todo: AOT
|
#todo: AOT
|
||||||
|
|
||||||
find_package(IntelSYCL REQUIRED)
|
find_package(IntelSYCL REQUIRED)
|
||||||
|
if (LLAMA_SYCL_F16)
|
||||||
#add_compile_definitions(GGML_SYCL_F16)
|
add_compile_definitions(GGML_SYCL_F16)
|
||||||
|
endif()
|
||||||
add_compile_definitions(GGML_USE_SYCL)
|
add_compile_definitions(GGML_USE_SYCL)
|
||||||
|
|
||||||
add_compile_options(-I./) #include DPCT
|
add_compile_options(-I./) #include DPCT
|
||||||
|
|
|
@ -120,7 +120,7 @@ cd build
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
#for FP16
|
#for FP16
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
|
||||||
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
@ -232,4 +232,4 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||||
|
|
||||||
- Support to build in Windows.
|
- Support to build in Windows.
|
||||||
|
|
||||||
- Support multiple cards.
|
- Support multiple cards.
|
||||||
|
|
|
@ -9297,9 +9297,17 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
||||||
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
|
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
|
||||||
|
|
||||||
const int compute_capability = g_device_caps[id].cc;
|
const int compute_capability = g_device_caps[id].cc;
|
||||||
|
#ifdef GGML_SYCL_F16
|
||||||
// if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
|
bool use_fp16 = true; // TODO(Yu) SYCL capability check
|
||||||
if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
|
#else
|
||||||
|
bool use_fp16 = false;
|
||||||
|
#endif
|
||||||
|
// if (compute_capability >= CC_VOLTA && (src0->type == GGML_TYPE_F16 ||
|
||||||
|
// ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff ==
|
||||||
|
// src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
|
||||||
|
if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
||||||
|
use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
|
||||||
|
dst->op_params[0] == GGML_PREC_DEFAULT) {
|
||||||
|
|
||||||
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
|
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue