Token count changes
This commit is contained in:
parent
96e80dabc6
commit
9289306e7a
2 changed files with 9 additions and 4 deletions
|
@ -485,7 +485,7 @@ int main(int argc, char ** argv) {
|
|||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||
// predict
|
||||
if (!embd.empty()) {
|
||||
// Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
|
||||
// Note: (n_ctx - 4) here is to match the logic for commandline prompt handling via
|
||||
// --prompt or --file which uses the same value.
|
||||
int max_embd_size = n_ctx - 4;
|
||||
|
||||
|
@ -500,10 +500,11 @@ int main(int argc, char ** argv) {
|
|||
fflush(stdout);
|
||||
}
|
||||
|
||||
// infinite text generation via context swapping
|
||||
// infinite text generation via context swapping UNLESS n_predict == -2
|
||||
// if we run out of context:
|
||||
// - take the n_keep first tokens from the original prompt (via n_past)
|
||||
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
|
||||
|
||||
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) > n_ctx) {
|
||||
if (params.n_predict == -2) {
|
||||
LOG_TEE("\n\n%s: context full and n_predict == -%d => stopping\n", __func__, params.n_predict);
|
||||
|
@ -611,12 +612,16 @@ int main(int argc, char ** argv) {
|
|||
n_past += n_eval;
|
||||
|
||||
LOG("n_past = %d\n", n_past);
|
||||
// I added the next two lines on 20240110
|
||||
if (n_past % 256 == 0) {
|
||||
printf("\n\033[31mTokens consumed so far = %d / %d \033[0m\n", n_past, n_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
if (!embd.empty() && !path_session.empty()) {
|
||||
session_tokens.insert(session_tokens.end(), embd.begin(), embd.end());
|
||||
n_session_consumed = session_tokens.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
embd.clear();
|
||||
|
|
|
@ -10881,7 +10881,7 @@ void llama_print_timings(struct llama_context * ctx) {
|
|||
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
|
||||
LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
|
||||
LLAMA_LOG_INFO("%s: total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
|
||||
LLAMA_LOG_INFO("%s: total time = %10.2f ms / %5d tokens\n", __func__, (timings.t_end_ms - timings.t_start_ms), (timings.n_sample + timings.n_p_eval + timings.n_eval));
|
||||
}
|
||||
|
||||
void llama_reset_timings(struct llama_context * ctx) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue