diff --git a/src/llama.cpp b/src/llama.cpp index 669c5f28e..8f21c39d1 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17170,7 +17170,7 @@ static int llama_decode_internal( // temporary allocate memory for the input batch if needed llama_batch_allocr batch_allocr(lctx, inp_batch); - llama_batch batch = batch_allocr.batch; + const llama_batch & batch = batch_allocr.batch; const uint32_t n_tokens_all = batch.n_tokens; const auto & model = lctx.model; @@ -17488,7 +17488,7 @@ static int llama_encode_internal( // temporary allocate memory for the input batch if needed llama_batch_allocr batch_allocr(lctx, inp_batch); - llama_batch batch = batch_allocr.batch; + const llama_batch & batch = batch_allocr.batch; const uint32_t n_tokens = batch.n_tokens; const auto & model = lctx.model;