diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index e82d63e4a..ca0d85ae9 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -308,8 +308,8 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { #if CUDART_VERSION < 12000 static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half2 b) { - const uint32_t mask_low = 0x0000FFFF * ( __low2half(a) > __low2half(b)); - const uint32_t mask_high = 0xFFFF0000 * (__high2half(a) > __high2half(b)); + const uint32_t mask_low = 0x0000FFFF * (float( __low2half(a)) > float( __low2half(b))); + const uint32_t mask_high = 0xFFFF0000 * (float(__high2half(a)) > float(__high2half(b))); return mask_low | mask_high; } #endif // CUDART_VERSION < 12000