cuda : try to fix __hgt2_mask
ggml-ci
This commit is contained in:
parent
c70bfd7bcb
commit
c129369702
1 changed files with 2 additions and 2 deletions
|
@ -308,8 +308,8 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
|||
|
||||
#if CUDART_VERSION < 12000
|
||||
static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half2 b) {
|
||||
const uint32_t mask_low = 0x0000FFFF * ( __low2half(a) > __low2half(b));
|
||||
const uint32_t mask_high = 0xFFFF0000 * (__high2half(a) > __high2half(b));
|
||||
const uint32_t mask_low = 0x0000FFFF * (float( __low2half(a)) > float( __low2half(b)));
|
||||
const uint32_t mask_high = 0xFFFF0000 * (float(__high2half(a)) > float(__high2half(b)));
|
||||
return mask_low | mask_high;
|
||||
}
|
||||
#endif // CUDART_VERSION < 12000
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue