ggml : fix do_yield logic
ggml-ci
This commit is contained in:
parent
23d9e5b6de
commit
f77882461f
1 changed files with 7 additions and 27 deletions
34
ggml.c
34
ggml.c
|
@ -16301,24 +16301,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||||
|
|
||||||
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
||||||
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
||||||
|
|
||||||
#if defined(GGML_USE_CUBLAS)
|
|
||||||
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
|
||||||
n_tasks = 1; // TODO: this actually is doing nothing
|
|
||||||
// the threads are still spinning
|
|
||||||
}
|
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
|
||||||
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
|
||||||
n_tasks = 1; // TODO: this actually is doing nothing
|
|
||||||
// the threads are still spinning
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
|
||||||
if (ggml_compute_forward_mul_mat_use_blas(node)) {
|
|
||||||
n_tasks = 1; // TODO: this actually is doing nothing
|
|
||||||
// the threads are still spinning
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MUL_MAT_ID:
|
case GGML_OP_MUL_MAT_ID:
|
||||||
{
|
{
|
||||||
|
@ -16564,9 +16546,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||||
// depending on the workload and the operating system.
|
// depending on the workload and the operating system.
|
||||||
// since it is not clear what is the best approach, it should potentially become user-configurable
|
// since it is not clear what is the best approach, it should potentially become user-configurable
|
||||||
// ref: https://github.com/ggerganov/ggml/issues/291
|
// ref: https://github.com/ggerganov/ggml/issues/291
|
||||||
// UPD: adding the do_yield flag seems to resolve the issue universally, though it is the opposite of
|
// UPD: adding the do_yield flag seems to resolve the issue universally
|
||||||
// what I expected. I would expect that when we call BLAS, the ggml threads should yield, but it
|
|
||||||
// seems that the opposite is true - when we call BLAS, we should not yield.
|
|
||||||
if (do_yield) {
|
if (do_yield) {
|
||||||
sched_yield();
|
sched_yield();
|
||||||
}
|
}
|
||||||
|
@ -16595,14 +16575,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||||
ggml_compute_forward(¶ms, node);
|
ggml_compute_forward(¶ms, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
do_yield = false;
|
||||||
do_yield = true;
|
|
||||||
|
|
||||||
// do not yield when we call BLAS
|
// call sched_yield() for heavier ops
|
||||||
if (node->op == GGML_OP_MUL_MAT && ggml_compute_forward_mul_mat_use_blas(node)) {
|
// TODO: might have to yield only when calling into BLAS - not sure yet
|
||||||
do_yield = false;
|
if (node->op == GGML_OP_MUL_MAT) {
|
||||||
|
//if (node->op == GGML_OP_MUL_MAT && ggml_compute_forward_mul_mat_use_blas(node)) {
|
||||||
|
do_yield = true;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return GGML_EXIT_SUCCESS;
|
return GGML_EXIT_SUCCESS;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue