diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 174916bc9..d28eb6889 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -47,6 +47,7 @@ #define GGML_CUDA_CC_TURING 750 #define GGML_CUDA_CC_AMPERE 800 #define GGML_CUDA_CC_OFFSET_AMD 0x1000000 +#define GGML_CUDA_CC_HOPPER 900 // GCN/CNDA, wave size is 64 #define GGML_CUDA_CC_GCN4 (GGML_CUDA_CC_OFFSET_AMD + 0x803) // Tonga, Fiji, Polaris, minimum for fast fp16 diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml/src/ggml-cuda/mma.cuh index bbc0a35ae..18c5868f6 100644 --- a/ggml/src/ggml-cuda/mma.cuh +++ b/ggml/src/ggml-cuda/mma.cuh @@ -16,7 +16,7 @@ #include "common.cuh" -#if CUDART_VERSION >= 11080 +#if (CUDART_VERSION >= 11080) && (__CUDA_ARCH__ >= GGML_CUDA_CC_HOPPER) static __device__ __forceinline__ int ggml_cuda_movmatrix(const int x) { int ret = 0;