Merge branch 'threadpool' of https://github.com/CodeLinaro/llama.cpp into threadpool
This commit is contained in:
commit
88dc99a20b
7 changed files with 9 additions and 40 deletions
|
@ -2495,13 +2495,9 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
|
|||
return true;
|
||||
}
|
||||
|
||||
GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
ggml_cgraph * cgraph,
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
GGML_UNUSED(threadpool);
|
||||
GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
|
||||
ggml_cuda_set_device(cuda_ctx->device);
|
||||
|
||||
#ifdef USE_CUDA_GRAPH
|
||||
|
|
|
@ -1948,12 +1948,7 @@ static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(g
|
|||
return ggml_backend_kompute_buffer_type(ctx->device);
|
||||
}
|
||||
|
||||
static ggml_status ggml_backend_kompute_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
struct ggml_cgraph * cgraph
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
GGML_UNUSED(threadpool);
|
||||
static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
auto * ctx = static_cast<ggml_kompute_context *>(backend->context);
|
||||
ggml_vk_graph_compute(ctx, cgraph);
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
|
|
@ -3103,12 +3103,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffe
|
|||
UNUSED(backend);
|
||||
}
|
||||
|
||||
GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
struct ggml_cgraph * cgraph,
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
UNUSED(threadpool);
|
||||
GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
|
||||
|
||||
return ggml_metal_graph_compute(metal_ctx, cgraph);
|
||||
|
|
|
@ -2235,12 +2235,7 @@ static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(gg
|
|||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static ggml_status ggml_backend_opencl_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
ggml_cgraph * graph,
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
GGML_UNUSED(threadpool);
|
||||
static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
|
||||
for (int i = 0; i < graph->n_nodes; ++i) {
|
||||
ggml_tensor * node = graph->nodes[i];
|
||||
|
||||
|
|
|
@ -585,8 +585,7 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & o
|
|||
memcpy(out_tensors, tensors.data(), n_tensors * sizeof(rpc_tensor));
|
||||
}
|
||||
|
||||
GGML_CALL static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, ggml_compute_threadpool * tp) {
|
||||
UNUSED(tp);
|
||||
GGML_CALL static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context;
|
||||
std::vector<uint8_t> input;
|
||||
serialize_graph(cgraph, input);
|
||||
|
@ -1021,7 +1020,7 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, std::vector<u
|
|||
for (uint32_t i = 0; i < n_nodes; i++) {
|
||||
graph->nodes[i] = create_node(nodes[i], ctx, tensor_ptrs, tensor_map);
|
||||
}
|
||||
ggml_status status = ggml_backend_graph_compute(backend, graph, NULL);
|
||||
ggml_status status = ggml_backend_graph_compute(backend, graph);
|
||||
// output serialization format: | status (1 byte) |
|
||||
output.resize(1, 0);
|
||||
output[0] = status;
|
||||
|
|
|
@ -17022,13 +17022,7 @@ catch (sycl::exception const &exc) {
|
|||
std::exit(1);
|
||||
}
|
||||
|
||||
GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
ggml_cgraph * cgraph,
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
GGML_UNUSED(threadpool);
|
||||
|
||||
GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||
ggml_sycl_set_main_device(sycl_ctx->device);
|
||||
|
||||
|
|
|
@ -6225,12 +6225,7 @@ static bool ggml_vk_is_empty(ggml_tensor * node) {
|
|||
return ggml_is_empty(node) || node->op == GGML_OP_NONE || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE;
|
||||
}
|
||||
|
||||
GGML_CALL static ggml_status ggml_backend_vk_graph_compute(
|
||||
ggml_backend_t backend,
|
||||
ggml_cgraph * cgraph,
|
||||
ggml_compute_threadpool_t threadpool) {
|
||||
|
||||
GGML_UNUSED(threadpool);
|
||||
GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
#ifdef GGML_VULKAN_DEBUG
|
||||
std::cerr << "ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)" << std::endl;
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue