cuda: Use common gfx8 value for GCN4

rocm_agent_enumerator returns the two values 800 and 803 with the former being used by this API
This commit is contained in:
Jon Haus 2025-01-12 17:42:24 -05:00
parent 924518e2e5
commit 21999481ea

View file

@ -49,7 +49,7 @@
#define GGML_CUDA_CC_OFFSET_AMD 1000000
// GCN/CNDA, wave size is 64
#define GGML_CUDA_CC_GCN4 (GGML_CUDA_CC_OFFSET_AMD + 803) // Tonga, Fiji, Polaris, minimum for fast fp16
#define GGML_CUDA_CC_GCN4 (GGML_CUDA_CC_OFFSET_AMD + 800) // Tonga, Fiji, Polaris, minimum for fast fp16
#define GGML_CUDA_CC_VEGA (GGML_CUDA_CC_OFFSET_AMD + 900) // Vega56/64, minimum for fp16 dual issue
#define GGML_CUDA_CC_VEGA20 (GGML_CUDA_CC_OFFSET_AMD + 906) // MI50/Radeon VII, minimum for dp4a
#define GGML_CUDA_CC_CDNA (GGML_CUDA_CC_OFFSET_AMD + 908) // MI100, minimum for MFMA, acc registers