Add some minimal optimizations for CDNA (#10498)

* Add some minimal optimizations for CDNA * ggml_cuda: set launch bounds also for GCN as it helps there too
2024-11-27 17:10:08 +01:00 · 2024-11-27 17:10:08 +01:00 · 3ad5451f3b
commit 3ad5451f3b
parent 46c69e0e75
6 changed files with 36 additions and 8 deletions
--- a/ggml/src/ggml-cuda/mmvq.cu
+++ b/ggml/src/ggml-cuda/mmvq.cu
@ -142,7 +142,7 @@ static void mul_mat_vec_q_cuda(
    int64_t nwarps = 1;
    int64_t rows_per_cuda_block = 1;

-    if (ggml_cuda_info().devices[id].cc < CC_RDNA2) { // NVIDIA and AMD older than RDNA2
+    if (ggml_cuda_info().devices[id].cc < CC_CDNA || ggml_cuda_info().devices[id].cc == CC_RDNA1) { // NVIDIA and AMD older than RDNA2 but not CDNA
        switch(ncols_y) {
            case 1:
                nwarps = 4;