fix cmake build
This commit is contained in:
parent
bb0d51accd
commit
c63dfdf765
2 changed files with 24 additions and 40 deletions
|
@ -271,7 +271,6 @@ static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) {
|
|||
return a;
|
||||
}
|
||||
|
||||
#ifdef GGML_CUDA_F16
|
||||
static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
|
||||
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
||||
#pragma unroll
|
||||
|
@ -284,7 +283,6 @@ static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
|
|||
NO_DEVICE_CODE;
|
||||
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
|
||||
}
|
||||
#endif // GGML_CUDA_F16
|
||||
|
||||
static __device__ __forceinline__ float warp_reduce_max(float x) {
|
||||
#pragma unroll
|
||||
|
@ -294,18 +292,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
|
|||
return x;
|
||||
}
|
||||
|
||||
//static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
||||
//#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||
//#pragma unroll
|
||||
// for (int mask = 16; mask > 0; mask >>= 1) {
|
||||
// x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
|
||||
// }
|
||||
// return x;
|
||||
//#else
|
||||
// GGML_UNUSED(x);
|
||||
// NO_DEVICE_CODE;
|
||||
//#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||
//}
|
||||
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
||||
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||
#pragma unroll
|
||||
for (int mask = 16; mask > 0; mask >>= 1) {
|
||||
x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
|
||||
}
|
||||
return x;
|
||||
#else
|
||||
GGML_UNUSED(x);
|
||||
NO_DEVICE_CODE;
|
||||
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
|
||||
}
|
||||
|
||||
|
||||
#if defined(GGML_USE_HIPBLAS)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue