Reset schedule earlier to allow overlap with ggml graph computation on device (#6933)
* Reset schedule earlier to allow overlap with graph computation on device
This commit is contained in:
parent
0c4d489e29
commit
928e0b7013
2 changed files with 11 additions and 5 deletions
|
@ -11473,6 +11473,10 @@ static int llama_decode_internal(
|
|||
}
|
||||
}
|
||||
|
||||
// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
|
||||
// overlap with device computation.
|
||||
ggml_backend_sched_reset(lctx.sched);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue