no vec for hs, no hs==256 ncols==32 for Volta

This commit is contained in:
Johannes Gäßler 2024-03-30 10:34:09 +01:00 committed by Georgi Gerganov
parent d59ac670bf
commit 81da919864
2 changed files with 37 additions and 36 deletions

View file

@ -141,6 +141,7 @@
#define CC_PASCAL 600
#define MIN_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
#define CC_VOLTA 700
#define CC_AMPERE 800
#define CC_OFFSET_AMD 1000000
#define CC_RDNA1 (CC_OFFSET_AMD + 1010)
#define CC_RDNA2 (CC_OFFSET_AMD + 1030)