rebase to master (except ggml-cuda)

This commit is contained in:
slaren 2023-07-16 14:36:32 +02:00
parent 33ab185dd1
commit 9c72e7e916
5 changed files with 21 additions and 30 deletions

View file

@ -1475,8 +1475,8 @@ static void ggml_cuda_mul_mat(ggml_cuda_context * ctx, ggml_tensor * src0, ggml_
}
static void ggml_cuda_exec_node(ggml_cuda_context * ctx, ggml_tensor * node, cudaStream_t stream) {
ggml_tensor * src0 = node->src0;
ggml_tensor * src1 = node->src1;
ggml_tensor * src0 = node->src[0];
ggml_tensor * src1 = node->src[1];
ggml_tensor * dst = node;
#if 0
@ -1551,8 +1551,6 @@ static void ggml_cuda_exec_node(ggml_cuda_context * ctx, ggml_tensor * node, cud
}
}
static const int GGML_MAX_PARENTS = 2 + GGML_MAX_OPT;
static bool ggml_is_noop(ggml_tensor * t) {
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_NONE;
@ -1581,26 +1579,20 @@ static void ggml_cuda_graph_exec_parallel(ggml_cuda_context * ctx, ggml_cgraph *
ggml_tensor * node = gf->nodes[i];
const bool is_noop = ggml_is_noop(node);
// build a list of parents
ggml_tensor * parents[GGML_MAX_PARENTS] = { node->src0, node->src1 };
for (int j = 0; j < GGML_MAX_OPT; j++) {
parents[j + 2] = node->opt[j];
}
// assign an stream for the node
cudaStream_t stream = nullptr;
// take a stream from a parent
for (int j = 0; j < GGML_MAX_PARENTS; j++) {
if (parents[j] && stream_map.count(parents[j]) && stream_map[parents[j]] != nullptr) {
stream = stream_map[parents[j]];
stream_map.erase(parents[j]);
for (int j = 0; j < GGML_MAX_SRC; j++) {
if (node->src[j] && stream_map.count(node->src[j]) && stream_map[node->src[j]] != nullptr) {
stream = stream_map[node->src[j]];
stream_map.erase(node->src[j]);
if (is_noop) {
// if this is a noop, we can use the parent's event
stream_map[node] = stream;
if (event_map.count(parents[j]) > 0) {
event_map[node] = event_map[parents[j]];
if (event_map.count(node->src[j]) > 0) {
event_map[node] = event_map[node->src[j]];
}
}
break;
@ -1624,9 +1616,9 @@ static void ggml_cuda_graph_exec_parallel(ggml_cuda_context * ctx, ggml_cgraph *
// wait on parent streams
bool waited = false;
for (int j = 0; j < GGML_MAX_PARENTS; j++) {
if (parents[j] && event_map.count(parents[j]) > 0) {
CUDA_CHECK(cudaStreamWaitEvent(stream, event_map[parents[j]], 0));
for (int j = 0; j < GGML_MAX_SRC; j++) {
if (node->src[j] && event_map.count(node->src[j]) > 0) {
CUDA_CHECK(cudaStreamWaitEvent(stream, event_map[node->src[j]], 0));
waited = true;
}
}