CUDA: remove DMMV, consolidate F16 mult mat vec (#10318)

This commit is contained in:
Johannes Gäßler 2024-11-17 09:09:55 +01:00 committed by GitHub
parent 467576b6cc
commit c3ea58aca4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 246 additions and 1000 deletions

View file

@ -54,21 +54,12 @@ if (CUDAToolkit_FOUND)
target_link_libraries(ggml-cuda PRIVATE ggml-base)
target_include_directories(ggml-cuda PRIVATE . ..)
# TODO: change the definitions to this target only
add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y})
add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
if (GGML_CUDA_GRAPHS)
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
endif()
if (GGML_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif()
if (GGML_CUDA_FORCE_MMQ)
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
endif()
@ -81,10 +72,6 @@ if (CUDAToolkit_FOUND)
add_compile_definitions(GGML_CUDA_NO_VMM)
endif()
if (DEFINED GGML_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility
endif()
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
add_compile_definitions(GGML_CUDA_F16)
endif()