From f77882461f2485c4555c1cee5cdbfa5f251d9dc4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jan 2024 11:43:01 +0200 Subject: [PATCH] ggml : fix do_yield logic ggml-ci --- ggml.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/ggml.c b/ggml.c index 65c7a780e..ed3e11254 100644 --- a/ggml.c +++ b/ggml.c @@ -16301,24 +16301,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { //n_tasks = MIN(n_threads, MAX(1, nr0/128)); //printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks); - -#if defined(GGML_USE_CUBLAS) - if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#elif defined(GGML_USE_CLBLAST) - if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#endif -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) - if (ggml_compute_forward_mul_mat_use_blas(node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#endif } break; case GGML_OP_MUL_MAT_ID: { @@ -16564,9 +16546,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { // depending on the workload and the operating system. // since it is not clear what is the best approach, it should potentially become user-configurable // ref: https://github.com/ggerganov/ggml/issues/291 - // UPD: adding the do_yield flag seems to resolve the issue universally, though it is the opposite of - // what I expected. I would expect that when we call BLAS, the ggml threads should yield, but it - // seems that the opposite is true - when we call BLAS, we should not yield. + // UPD: adding the do_yield flag seems to resolve the issue universally if (do_yield) { sched_yield(); } @@ -16595,14 +16575,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { ggml_compute_forward(¶ms, node); } -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) - do_yield = true; + do_yield = false; - // do not yield when we call BLAS - if (node->op == GGML_OP_MUL_MAT && ggml_compute_forward_mul_mat_use_blas(node)) { - do_yield = false; + // call sched_yield() for heavier ops + // TODO: might have to yield only when calling into BLAS - not sure yet + if (node->op == GGML_OP_MUL_MAT) { + //if (node->op == GGML_OP_MUL_MAT && ggml_compute_forward_mul_mat_use_blas(node)) { + do_yield = true; } -#endif } return GGML_EXIT_SUCCESS;