moved reset to end of llama_decode_internal
This commit is contained in:
parent
a2beaffec8
commit
34847caa9a
1 changed files with 4 additions and 5 deletions
|
@ -11205,6 +11205,10 @@ static int llama_decode_internal(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
|
||||||
|
// overlap with device computation.
|
||||||
|
ggml_backend_sched_reset(lctx.sched);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16773,11 +16777,6 @@ float * llama_get_logits(struct llama_context * ctx) {
|
||||||
|
|
||||||
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
|
||||||
int32_t j = -1;
|
int32_t j = -1;
|
||||||
|
|
||||||
// Reset state for the next run before the following backend sync,
|
|
||||||
// to allow the CPU activities in the reset to overlap with device computation.
|
|
||||||
ggml_backend_sched_reset(ctx->sched);
|
|
||||||
|
|
||||||
llama_synchronize(ctx);
|
llama_synchronize(ctx);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue