Reset schedule earlier to allow overlap with graph computation on device
Refs #6763
This commit is contained in:
parent
637e9a86c2
commit
a2beaffec8
2 changed files with 13 additions and 5 deletions
|
@ -1780,12 +1780,15 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
|||
|
||||
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
||||
// reset state for the next run
|
||||
size_t hash_size = sched->hash_set.size;
|
||||
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
|
||||
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
|
||||
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
||||
if(!sched->is_reset)
|
||||
{
|
||||
size_t hash_size = sched->hash_set.size;
|
||||
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
|
||||
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
|
||||
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
||||
|
||||
sched->is_reset = true;
|
||||
sched->is_reset = true;
|
||||
}
|
||||
sched->is_alloc = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -16773,6 +16773,11 @@ float * llama_get_logits(struct llama_context * ctx) {
|
|||
|
||||
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
||||
int32_t j = -1;
|
||||
|
||||
// Reset state for the next run before the following backend sync,
|
||||
// to allow the CPU activities in the reset to overlap with device computation.
|
||||
ggml_backend_sched_reset(ctx->sched);
|
||||
|
||||
llama_synchronize(ctx);
|
||||
|
||||
try {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue