This commit is contained in:
Meng, Hengyu 2024-05-28 06:39:21 +08:00
parent 583c81c91c
commit abe594a058
2 changed files with 7 additions and 12 deletions

View file

@ -96,8 +96,8 @@ option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM"
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUDA "llama: use CUDA" OFF) option(LLAMA_CUDA "llama: use CUDA" OFF)
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF) option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
option(LLAMA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) option(LLAMA_FORCE_DMMV "llama: use dmmv instead of mmvq kernels on GPU" OFF)
option(LLAMA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) option(LLAMA_FORCE_MMQ "llama: use mmq kernels instead of Math Lib" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
@ -628,10 +628,10 @@ if (LLAMA_SYCL)
add_compile_definitions(GGML_SYCL_F16) add_compile_definitions(GGML_SYCL_F16)
endif() endif()
if (LLAMA_SYCL_FORCE_DMMV) if (LLAMA_FORCE_DMMV)
add_compile_definitions(GGML_SYCL_FORCE_DMMV) add_compile_definitions(GGML_SYCL_FORCE_DMMV)
endif() endif()
if (LLAMA_SYCL_FORCE_MMQ) if (LLAMA_FORCE_MMQ)
add_compile_definitions(GGML_SYCL_FORCE_MMQ) add_compile_definitions(GGML_SYCL_FORCE_MMQ)
endif() endif()

View file

@ -2978,10 +2978,9 @@ static int g_work_group_size = 0;
#define GGML_SYCL_MAX_NODES 8192 //TODO: adapt to hardwares #define GGML_SYCL_MAX_NODES 8192 //TODO: adapt to hardwares
#if !defined(GGML_SYCL_FORCE_MMQ)
//define for XMX in Intel GPU #define SYCL_USE_XMX
//TODO: currently, it's not used for XMX really. #endif
#define SYCL_USE_XMX
// max batch size to use MMQ kernels when tensor cores are available // max batch size to use MMQ kernels when tensor cores are available
#define MMQ_MAX_BATCH_SIZE 32 #define MMQ_MAX_BATCH_SIZE 32
@ -15228,10 +15227,6 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
} }
} }
#if !defined(GGML_SYCL_FORCE_MMQ)
#define SYCL_USE_XMX
#endif
#ifdef SYCL_USE_XMX #ifdef SYCL_USE_XMX
bool use_xmx = true; bool use_xmx = true;
#else #else