rebase to master (except ggml-cuda)
This commit is contained in:
parent
33ab185dd1
commit
9c72e7e916
5 changed files with 21 additions and 30 deletions
30
ggml-cuda.cu
30
ggml-cuda.cu
|
@ -1475,8 +1475,8 @@ static void ggml_cuda_mul_mat(ggml_cuda_context * ctx, ggml_tensor * src0, ggml_
|
|||
}
|
||||
|
||||
static void ggml_cuda_exec_node(ggml_cuda_context * ctx, ggml_tensor * node, cudaStream_t stream) {
|
||||
ggml_tensor * src0 = node->src0;
|
||||
ggml_tensor * src1 = node->src1;
|
||||
ggml_tensor * src0 = node->src[0];
|
||||
ggml_tensor * src1 = node->src[1];
|
||||
ggml_tensor * dst = node;
|
||||
|
||||
#if 0
|
||||
|
@ -1551,8 +1551,6 @@ static void ggml_cuda_exec_node(ggml_cuda_context * ctx, ggml_tensor * node, cud
|
|||
}
|
||||
}
|
||||
|
||||
static const int GGML_MAX_PARENTS = 2 + GGML_MAX_OPT;
|
||||
|
||||
static bool ggml_is_noop(ggml_tensor * t) {
|
||||
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
|
||||
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_NONE;
|
||||
|
@ -1581,26 +1579,20 @@ static void ggml_cuda_graph_exec_parallel(ggml_cuda_context * ctx, ggml_cgraph *
|
|||
ggml_tensor * node = gf->nodes[i];
|
||||
const bool is_noop = ggml_is_noop(node);
|
||||
|
||||
// build a list of parents
|
||||
ggml_tensor * parents[GGML_MAX_PARENTS] = { node->src0, node->src1 };
|
||||
for (int j = 0; j < GGML_MAX_OPT; j++) {
|
||||
parents[j + 2] = node->opt[j];
|
||||
}
|
||||
|
||||
// assign an stream for the node
|
||||
cudaStream_t stream = nullptr;
|
||||
|
||||
// take a stream from a parent
|
||||
for (int j = 0; j < GGML_MAX_PARENTS; j++) {
|
||||
if (parents[j] && stream_map.count(parents[j]) && stream_map[parents[j]] != nullptr) {
|
||||
stream = stream_map[parents[j]];
|
||||
stream_map.erase(parents[j]);
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
if (node->src[j] && stream_map.count(node->src[j]) && stream_map[node->src[j]] != nullptr) {
|
||||
stream = stream_map[node->src[j]];
|
||||
stream_map.erase(node->src[j]);
|
||||
|
||||
if (is_noop) {
|
||||
// if this is a noop, we can use the parent's event
|
||||
stream_map[node] = stream;
|
||||
if (event_map.count(parents[j]) > 0) {
|
||||
event_map[node] = event_map[parents[j]];
|
||||
if (event_map.count(node->src[j]) > 0) {
|
||||
event_map[node] = event_map[node->src[j]];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1624,9 +1616,9 @@ static void ggml_cuda_graph_exec_parallel(ggml_cuda_context * ctx, ggml_cgraph *
|
|||
|
||||
// wait on parent streams
|
||||
bool waited = false;
|
||||
for (int j = 0; j < GGML_MAX_PARENTS; j++) {
|
||||
if (parents[j] && event_map.count(parents[j]) > 0) {
|
||||
CUDA_CHECK(cudaStreamWaitEvent(stream, event_map[parents[j]], 0));
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
if (node->src[j] && event_map.count(node->src[j]) > 0) {
|
||||
CUDA_CHECK(cudaStreamWaitEvent(stream, event_map[node->src[j]], 0));
|
||||
waited = true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue