From 7fe612dc7367302c818f4398a82b8cbc79b37808 Mon Sep 17 00:00:00 2001
From: l3utterfly <gc.pthzfoldr@gmail.com>
Date: Wed, 31 Jan 2024 10:47:24 +0900
Subject: [PATCH] added docs explaining the rollback op

---
 common/sampling.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/common/sampling.h b/common/sampling.h
index f31b2d900..9757f2436 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -121,6 +121,10 @@ void llama_sampling_accept(
         llama_token id,
         bool apply_grammar);
 
+// this performs rollback of the latest sampling operation by "rollback_num" tokens;
+// it simply strikes the latest "rollback_num" tokens from the "prev" vector
+// in general, the rollback is "imperfect", meaning the "forgotten tokens" which were dropped when the length of "prev" exceeded "n_prev" cannot be recalled after rollback
+// however, if `sampling_params.n_prev` >= `sampling_params.penalty_last_n` + `rollback_num`, then it becomes "perfect" rollback
 void llama_sampling_rollback(
         struct llama_sampling_context * ctx_sampling,
         int rollback_num);