diff --git a/ggml-cuda.cu b/ggml-cuda.cu index d33f8a49b..b82167cbf 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2495,13 +2495,9 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra return true; } -GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute( - ggml_backend_t backend, - ggml_cgraph * cgraph, - ggml_compute_threadpool_t threadpool) { - - GGML_UNUSED(threadpool); +GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; + ggml_cuda_set_device(cuda_ctx->device); #ifdef USE_CUDA_GRAPH diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 90272d5f1..6c6058b2a 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1948,12 +1948,7 @@ static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(g return ggml_backend_kompute_buffer_type(ctx->device); } -static ggml_status ggml_backend_kompute_graph_compute( - ggml_backend_t backend, - struct ggml_cgraph * cgraph - ggml_compute_threadpool_t threadpool) { - - GGML_UNUSED(threadpool); +static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { auto * ctx = static_cast(backend->context); ggml_vk_graph_compute(ctx, cgraph); return GGML_STATUS_SUCCESS; diff --git a/ggml-metal.m b/ggml-metal.m index 051ade2fc..c9e570dbf 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -3103,12 +3103,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffe UNUSED(backend); } -GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute( - ggml_backend_t backend, - struct ggml_cgraph * cgraph, - ggml_compute_threadpool_t threadpool) { - - UNUSED(threadpool); +GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context; return ggml_metal_graph_compute(metal_ctx, cgraph); diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 079a718a1..e28566a7b 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -2235,12 +2235,7 @@ static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(gg GGML_UNUSED(backend); } -static ggml_status ggml_backend_opencl_graph_compute( - ggml_backend_t backend, - ggml_cgraph * graph, - ggml_compute_threadpool_t threadpool) { - - GGML_UNUSED(threadpool); +static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) { for (int i = 0; i < graph->n_nodes; ++i) { ggml_tensor * node = graph->nodes[i]; diff --git a/ggml-rpc.cpp b/ggml-rpc.cpp index f3a4fe827..cc1d3ace1 100644 --- a/ggml-rpc.cpp +++ b/ggml-rpc.cpp @@ -585,8 +585,7 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector & o memcpy(out_tensors, tensors.data(), n_tensors * sizeof(rpc_tensor)); } -GGML_CALL static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, ggml_compute_threadpool * tp) { - UNUSED(tp); +GGML_CALL static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context; std::vector input; serialize_graph(cgraph, input); @@ -1021,7 +1020,7 @@ bool rpc_server::graph_compute(const std::vector & input, std::vectornodes[i] = create_node(nodes[i], ctx, tensor_ptrs, tensor_map); } - ggml_status status = ggml_backend_graph_compute(backend, graph, NULL); + ggml_status status = ggml_backend_graph_compute(backend, graph); // output serialization format: | status (1 byte) | output.resize(1, 0); output[0] = status; diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 15d07dc7a..496ec61c3 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -17022,13 +17022,7 @@ catch (sycl::exception const &exc) { std::exit(1); } -GGML_CALL static ggml_status ggml_backend_sycl_graph_compute( - ggml_backend_t backend, - ggml_cgraph * cgraph, - ggml_compute_threadpool_t threadpool) { - - GGML_UNUSED(threadpool); - +GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_sycl_set_main_device(sycl_ctx->device); diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 1f7923d65..79ce1479f 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -6225,12 +6225,7 @@ static bool ggml_vk_is_empty(ggml_tensor * node) { return ggml_is_empty(node) || node->op == GGML_OP_NONE || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE; } -GGML_CALL static ggml_status ggml_backend_vk_graph_compute( - ggml_backend_t backend, - ggml_cgraph * cgraph, - ggml_compute_threadpool_t threadpool) { - - GGML_UNUSED(threadpool); +GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { #ifdef GGML_VULKAN_DEBUG std::cerr << "ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)" << std::endl; #endif