diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 92f9309b7..e8ff91163 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -172,6 +172,7 @@ #endif typedef int8_t int8x4_t __attribute__((ext_vector_type(4))); +typedef uint8_t int8x4_t __attribute__((ext_vector_type(4))); static __device__ __forceinline__ int __vsubss4(const int a, const int b) { const int8x4_t va = reinterpret_cast(a); const int8x4_t vb = reinterpret_cast(b); @@ -201,6 +202,7 @@ static __device__ __forceinline__ unsigned int __vcmpeq4(unsigned int a, unsigne const uint8x4_t& vb = reinterpret_cast(b); unsigned int c; uint8x4_t& vc = reinterpret_cast(c); +#pragma unroll for (int i = 0; i < 4; ++i) { vc[i] = va[i] == vb[i] ? 0xff : 0x00; }