CUDA: refactor mmq, dmmv, mmvq (#7716)

* CUDA: refactor mmq, dmmv, mmvq

* fix out-of-bounds write

* struct for qk, qr, qi

* fix cmake build

* mmq_type_traits
This commit is contained in:
Johannes Gäßler 2024-06-05 16:53:00 +02:00 committed by GitHub
parent 2b3389677a
commit 7d1a378b8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
112 changed files with 1783 additions and 1767 deletions

View file

@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2;
#define QI1_S (QK_K / (4*QR1_S))
#define QR1_S 8
#define QI1_M (QK_K / (4*QR1_M))
#define QR1_M 8
#define QI4_NL (QK4_NL / (4*QR4_NL))
#define QR4_NL 2
#define QI4_XS (QK_K / (4*QR4_XS))
#define QR4_XS 8
#define QI3_S (QK_K / (4*QR3_S))
#define QR3_S 8
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
#define QK4_0 32