From 1e7a033f10891e502e19b19ebc7da918409fe21e Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sat, 26 Aug 2023 17:42:33 +0300
Subject: [PATCH] common : add comments

---
 common/common.h | 6 ++++++
 llama.h         | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/common/common.h b/common/common.h
index cb1627fc6..1c1acf989 100644
--- a/common/common.h
+++ b/common/common.h
@@ -116,15 +116,21 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
 // Vocab utils
 //
 
+// tokenizes a string into a vector of tokens
+// should work similar to Python's `tokenizer.encode`
 std::vector<llama_token> llama_tokenize(
         struct llama_context * ctx,
            const std::string & text,
                         bool   add_bos);
 
+// tokenizes a token into a piece
+// should work similar to Python's `tokenizer.id_to_piece`
 std::string llama_token_to_piece(
         const struct llama_context * ctx,
                        llama_token   token);
 
+// detokenizes a vector of tokens into a string
+// should work similar to Python's `tokenizer.decode`
 // removes the leading space from the first non-BOS token
 std::string llama_detokenize(
                          llama_context * ctx,
diff --git a/llama.h b/llama.h
index f9a7300ea..b084fe23c 100644
--- a/llama.h
+++ b/llama.h
@@ -384,7 +384,7 @@ extern "C" {
     // Token Id -> Piece.
     // Uses the vocabulary in the provided context.
     // Does not write null terminator to the buffer.
-    // Use code is responsible to remove the leading whitespace of the first non-BOS token.
+    // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
     LLAMA_API int llama_token_to_piece(
             const struct llama_context * ctx,
                            llama_token   token,