diff --git a/include/llama.h b/include/llama.h
index 7225874f7..4dd5348a8 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1048,6 +1048,7 @@ extern "C" {
 
     //
     // Sampling API
+    // TODO: remove before merge
     //
 
     // TODO: llama_model should become llama_vocab
@@ -1175,6 +1176,23 @@ extern "C" {
     //
     // Sampling v2 API
     //
+    // - Constraints
+    //   The llama_constraint object works on a set of candidate tokens (llama_token_data_array), by modifying their
+    //   logits and probabilities inplace. The interface is abstracted so that users can implement custom constraints.
+    //
+    // - Samplers
+    //   The llama_sampler samples a token based on the candidate token probabilities. Before the actual sampling, the
+    //   sampler can apply a sequence of constraints to the candidate tokens.
+    //
+    // The llama_sampler object contains the entire sampling information:
+    //
+    //   - RNG state (seed and generator)
+    //   - Custom set of constraints (see llama_sampler_add_constraint)
+    //   - Sampling method (greedy, dist, mirostat)
+    //   - Previous tokens
+    //
+    // In the future, it will be utilized offload the sampling to the backends (e.g. GPU).
+    //
 
     // constraints
 
@@ -1182,6 +1200,7 @@ extern "C" {
 
     typedef void * llama_constraint_context_t;
 
+    // user code can implement the interface below in order to create custom llama_constraint
     struct llama_constraint_i {
         // TODO: add name API
 
@@ -1263,9 +1282,7 @@ extern "C" {
     /// @details Get the ith accepted token
     /// @param ith [0, n_prev), ith == 0 is the last accepted token.
     /// returns LLAMA_TOKEN_NULL if ith is out of bounds
-    LLAMA_API llama_token llama_sampler_prev(
-            const struct llama_sampler * smpl,
-                               int32_t   ith);
+    LLAMA_API llama_token llama_sampler_prev(const struct llama_sampler * smpl, int32_t ith);
 
     /// @details Get the last accepted token
     /// Same as llama_sampler_prev(smpl, 0)