Add some minimal optimizations for CDNA (#10498)

* Add some minimal optimizations for CDNA

* ggml_cuda: set launch bounds also for GCN as it helps there too
This commit is contained in:
uvos 2024-11-27 17:10:08 +01:00 committed by GitHub
parent 46c69e0e75
commit 3ad5451f3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 36 additions and 8 deletions

View file

@ -142,7 +142,7 @@ static void mul_mat_vec_q_cuda(
int64_t nwarps = 1;
int64_t rows_per_cuda_block = 1;
if (ggml_cuda_info().devices[id].cc < CC_RDNA2) { // NVIDIA and AMD older than RDNA2
if (ggml_cuda_info().devices[id].cc < CC_CDNA || ggml_cuda_info().devices[id].cc == CC_RDNA1) { // NVIDIA and AMD older than RDNA2 but not CDNA
switch(ncols_y) {
case 1:
nwarps = 4;