CUDA: refactor and optimize IQ MMVQ (#8215)

* CUDA: refactor and optimize IQ MMVQ

* uint -> uint32_t

* __dp4a -> ggml_cuda_dp4a

* remove MIN_CC_DP4A checks

* change default

* try CI fix
This commit is contained in:
Johannes Gäßler 2024-07-01 20:39:06 +02:00 committed by GitHub
parent dae57a1ebc
commit cb5fad4c6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 406 additions and 487 deletions

View file

@ -106,19 +106,19 @@ typedef sycl::half2 ggml_half2;
#define QR6_K 2
#define QI2_XXS (QK_K / (4*QR2_XXS))
#define QR2_XXS 8
#define QR2_XXS 4
#define QI2_XS (QK_K / (4*QR2_XS))
#define QR2_XS 8
#define QR2_XS 4
#define QI2_S (QK_K / (4*QR2_S))
#define QR2_S 8
#define QR2_S 4
#define QI3_XXS (QK_K / (4*QR3_XXS))
#define QR3_XXS 8
#define QR3_XXS 4
#define QI3_XS (QK_K / (4*QR3_XS))
#define QR3_XS 8
#define QR3_XS 4
#define QI1_S (QK_K / (4*QR1_S))
#define QR1_S 8
@ -130,10 +130,10 @@ typedef sycl::half2 ggml_half2;
#define QR4_NL 2
#define QI4_XS (QK_K / (4*QR4_XS))
#define QR4_XS 8
#define QR4_XS 2
#define QI3_S (QK_K / (4*QR3_S))
#define QR3_S 8
#define QR3_S 4
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP