moved reset to end of llama_decode_internal

2024-04-26 10:24:02 -07:00 · 2024-04-26 10:24:02 -07:00 · 34847caa9a
commit 34847caa9a
parent a2beaffec8
1 changed files with 4 additions and 5 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -11205,6 +11205,10 @@ static int llama_decode_internal(
        }
    }

+    // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
+    // overlap with device computation.
+    ggml_backend_sched_reset(lctx.sched);
+
    return 0;
 }

@ -16773,11 +16777,6 @@ float * llama_get_logits(struct llama_context * ctx) {

 float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
    int32_t j = -1;
-
-    // Reset state for the next run before the following backend sync,
-    // to allow the CPU activities in the reset to overlap with device computation.
-    ggml_backend_sched_reset(ctx->sched);
-
    llama_synchronize(ctx);

    try {