CUDA: Fix models with output size != 32000 (#2480)
This commit is contained in:
parent
220d931864
commit
4f6b60c776
2 changed files with 249 additions and 75 deletions
|
@ -280,8 +280,8 @@ if (LLAMA_CUBLAS)
|
|||
# 52 == lowest CUDA 12 standard
|
||||
# 60 == f16 CUDA intrinsics
|
||||
# 61 == integer CUDA intrinsics
|
||||
# 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||
if (LLAMA_CUDA_DMMV_F16)
|
||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue