fix typo
This commit is contained in:
parent
583c81c91c
commit
abe594a058
2 changed files with 7 additions and 12 deletions
|
@ -96,8 +96,8 @@ option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM"
|
||||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||||
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
||||||
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
||||||
option(LLAMA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
option(LLAMA_FORCE_DMMV "llama: use dmmv instead of mmvq kernels on GPU" OFF)
|
||||||
option(LLAMA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
|
option(LLAMA_FORCE_MMQ "llama: use mmq kernels instead of Math Lib" OFF)
|
||||||
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
||||||
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
|
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
|
||||||
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
|
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
|
||||||
|
@ -628,10 +628,10 @@ if (LLAMA_SYCL)
|
||||||
add_compile_definitions(GGML_SYCL_F16)
|
add_compile_definitions(GGML_SYCL_F16)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_SYCL_FORCE_DMMV)
|
if (LLAMA_FORCE_DMMV)
|
||||||
add_compile_definitions(GGML_SYCL_FORCE_DMMV)
|
add_compile_definitions(GGML_SYCL_FORCE_DMMV)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_SYCL_FORCE_MMQ)
|
if (LLAMA_FORCE_MMQ)
|
||||||
add_compile_definitions(GGML_SYCL_FORCE_MMQ)
|
add_compile_definitions(GGML_SYCL_FORCE_MMQ)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -2978,10 +2978,9 @@ static int g_work_group_size = 0;
|
||||||
|
|
||||||
#define GGML_SYCL_MAX_NODES 8192 //TODO: adapt to hardwares
|
#define GGML_SYCL_MAX_NODES 8192 //TODO: adapt to hardwares
|
||||||
|
|
||||||
|
#if !defined(GGML_SYCL_FORCE_MMQ)
|
||||||
//define for XMX in Intel GPU
|
|
||||||
//TODO: currently, it's not used for XMX really.
|
|
||||||
#define SYCL_USE_XMX
|
#define SYCL_USE_XMX
|
||||||
|
#endif
|
||||||
|
|
||||||
// max batch size to use MMQ kernels when tensor cores are available
|
// max batch size to use MMQ kernels when tensor cores are available
|
||||||
#define MMQ_MAX_BATCH_SIZE 32
|
#define MMQ_MAX_BATCH_SIZE 32
|
||||||
|
@ -15228,10 +15227,6 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(GGML_SYCL_FORCE_MMQ)
|
|
||||||
#define SYCL_USE_XMX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SYCL_USE_XMX
|
#ifdef SYCL_USE_XMX
|
||||||
bool use_xmx = true;
|
bool use_xmx = true;
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue