From 34847caa9a241f07d6e4332b55231f0404de1176 Mon Sep 17 00:00:00 2001 From: Alan Gray Date: Fri, 26 Apr 2024 10:24:02 -0700 Subject: [PATCH] moved reset to end of llama_decode_internal --- llama.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index 86615a3f1..b4da31e75 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11205,6 +11205,10 @@ static int llama_decode_internal( } } + // Reset state for the next token before backend sync, to allow the CPU activities in the reset to + // overlap with device computation. + ggml_backend_sched_reset(lctx.sched); + return 0; } @@ -16773,11 +16777,6 @@ float * llama_get_logits(struct llama_context * ctx) { float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) { int32_t j = -1; - - // Reset state for the next run before the following backend sync, - // to allow the CPU activities in the reset to overlap with device computation. - ggml_backend_sched_reset(ctx->sched); - llama_synchronize(ctx); try {