From 2f538b9547ec2c2c67be0d41ed96d33c141354fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 17 Apr 2024 16:29:28 +0200 Subject: [PATCH] Add __hgt2_mask implementation for CUDA 11 --- ggml-cuda/common.cuh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index 989780dbc..ac6de643d 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -306,6 +306,13 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX } +#if CUDART_VERSION < 12000 +static __device__ __forceinline__ uint __hgt2_mask(const half2 a, const half2 b) { + const uint mask_low = 0x0000FFFF * ( __low2half(a) > __low2half(b)); + const uint mask_high = 0xFFFF0000 * (__high2half(a) > __high2half(b)); + return mask_low | mask_high; +} +#endif // CUDART_VERSION < 12000 #if defined(GGML_USE_HIPBLAS) #define __CUDA_ARCH__ 1300