From 87099452ede829d57ee20b517e7ce4747def5e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 13 Jun 2024 18:06:07 +0200 Subject: [PATCH] try CI fix --- ggml-cuda/common.cuh | 4 ++++ ggml-cuda/mmq.cuh | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index 3f51548d0..de7c2e434 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -331,6 +331,10 @@ static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int #define FP16_AVAILABLE #endif // (defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= CC_PASCAL +#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610 +#define FAST_FP16_AVAILABLE +#endif // defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610 + #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_VOLTA #define FP16_MMA_AVAILABLE #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_VOLTA diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh index 594f0742d..c454f3f0a 100644 --- a/ggml-cuda/mmq.cuh +++ b/ggml-cuda/mmq.cuh @@ -839,7 +839,14 @@ template static __device__ __forceinlin } const int sc_m = bxi->scales[kqsx]; - x_dm[i*(WARP_SIZE + 1) + threadIdx.x] = bxi->dm * make_half2(sc_m & 0x0F, sc_m >> 4); +#ifdef FAST_FP16_AVAILABLE + const half2 x_dm_ik = __hmul2(bxi->dm, make_half2(sc_m & 0x0F, sc_m >> 4)); +#else + const float2 bxi_dmf = __half22float2(bxi->dm); + const half2 x_dm_ik = make_half2(bxi_dmf.x*(sc_m & 0x0F), bxi_dmf.y*(sc_m >> 4)); +#endif // FAST_FP16_AVAILABLE + + x_dm[i*(WARP_SIZE + 1) + threadIdx.x] = x_dm_ik; } }