Add hipGraph support
This commit is contained in:
parent
96f4053934
commit
580b619a07
5 changed files with 34 additions and 1 deletions
|
@ -153,6 +153,7 @@ option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashA
|
||||||
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
||||||
|
|
||||||
option(GGML_HIP "ggml: use HIP" OFF)
|
option(GGML_HIP "ggml: use HIP" OFF)
|
||||||
|
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
||||||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||||
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||||
|
|
|
@ -588,7 +588,7 @@ struct ggml_tensor_extra_gpu {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#if (CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)
|
#if ((CUDART_VERSION >= 12000) && defined(GGML_CUDA_USE_GRAPHS)) || defined(GGML_HIP_GRAPHS)
|
||||||
#define USE_CUDA_GRAPH
|
#define USE_CUDA_GRAPH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -2493,11 +2493,17 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
|
||||||
static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
|
static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
|
||||||
|
|
||||||
cudaGraphExecUpdateResultInfo result_info;
|
cudaGraphExecUpdateResultInfo result_info;
|
||||||
|
#ifdef __HIP_PLATFORM_AMD__
|
||||||
|
hipGraphNode_t errorNode;
|
||||||
|
hipError_t stat = hipGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &errorNode, &result_info);
|
||||||
|
#else
|
||||||
cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
|
cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
|
||||||
|
#endif
|
||||||
if (stat == cudaErrorGraphExecUpdateFailure) {
|
if (stat == cudaErrorGraphExecUpdateFailure) {
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
GGML_LOG_DEBUG("%s: CUDA graph update failed\n", __func__);
|
GGML_LOG_DEBUG("%s: CUDA graph update failed\n", __func__);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The pre-existing graph exec cannot be updated due to violated constraints
|
// The pre-existing graph exec cannot be updated due to violated constraints
|
||||||
// so instead clear error and re-instantiate
|
// so instead clear error and re-instantiate
|
||||||
cudaGetLastError();
|
cudaGetLastError();
|
||||||
|
|
22
ggml/src/ggml-cuda/vendors/hip.h
vendored
22
ggml/src/ggml-cuda/vendors/hip.h
vendored
|
@ -81,6 +81,28 @@
|
||||||
#define cudaStreamPerThread hipStreamPerThread
|
#define cudaStreamPerThread hipStreamPerThread
|
||||||
#define cudaStreamSynchronize hipStreamSynchronize
|
#define cudaStreamSynchronize hipStreamSynchronize
|
||||||
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
|
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
|
||||||
|
#define cudaGraphExec_t hipGraphExec_t
|
||||||
|
#define cudaGraphNode_t hipGraphNode_t
|
||||||
|
#define cudaKernelNodeParams hipKernelNodeParams
|
||||||
|
#define cudaKernelNodeParams hipKernelNodeParams
|
||||||
|
#define cudaGraphExecDestroy hipGraphExecDestroy
|
||||||
|
#define cudaGraphLaunch hipGraphLaunch
|
||||||
|
#define cudaErrorGraphExecUpdateFailure hipErrorGraphExecUpdateFailure
|
||||||
|
#define cudaGraphExecUpdateResultInfo hipGraphExecUpdateResult
|
||||||
|
#define cudaGraphNodeType hipGraphNodeType
|
||||||
|
#define cudaGraphNodeTypeKernel hipGraphNodeTypeKernel
|
||||||
|
#define cudaGraphInstantiate hipGraphInstantiate
|
||||||
|
#define cudaStreamEndCapture hipStreamEndCapture
|
||||||
|
#define cudaGraphDestroy hipGraphDestroy
|
||||||
|
#define cudaGraphKernelNodeSetParams hipGraphKernelNodeSetParams
|
||||||
|
#define cudaErrorInvalidDeviceFunction hipErrorInvalidDeviceFunction
|
||||||
|
#define cudaGraphKernelNodeGetParams hipGraphKernelNodeGetParams
|
||||||
|
#define cudaGraphNodeGetType hipGraphNodeGetType
|
||||||
|
#define cudaGraphGetNodes hipGraphGetNodes
|
||||||
|
#define cudaGraphExecUpdate hipGraphExecUpdate
|
||||||
|
#define cudaStreamCaptureModeRelaxed hipStreamCaptureModeRelaxed
|
||||||
|
#define cudaStreamBeginCapture hipStreamBeginCapture
|
||||||
|
#define cudaGraph_t hipGraph_t
|
||||||
#define cudaStream_t hipStream_t
|
#define cudaStream_t hipStream_t
|
||||||
#define cudaSuccess hipSuccess
|
#define cudaSuccess hipSuccess
|
||||||
#define __trap() do { abort(); __builtin_unreachable(); } while(0)
|
#define __trap() do { abort(); __builtin_unreachable(); } while(0)
|
||||||
|
|
|
@ -92,6 +92,10 @@ if (GGML_CUDA_NO_PEER_COPY)
|
||||||
add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
|
add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (GGML_HIP_GRAPHS)
|
||||||
|
add_compile_definitions(GGML_HIP_GRAPHS)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (CXX_IS_HIPCC)
|
if (CXX_IS_HIPCC)
|
||||||
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
|
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
|
||||||
target_link_libraries(ggml-hip PRIVATE hip::device)
|
target_link_libraries(ggml-hip PRIVATE hip::device)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue