grammars: nits (revert const grammar sig, fix comment)

2024-04-28 15:34:56 +01:00 · 2024-04-28 15:34:56 +01:00 · c70037f2b3
commit c70037f2b3
parent d3425f5cf1
2 changed files with 3 additions and 3 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -13545,7 +13545,7 @@ void llama_sample_repetition_penalties(
    }
 }
-void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, struct llama_grammar * grammar) {
+void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, const struct llama_grammar * grammar) {
    GGML_ASSERT(ctx);
    const int64_t t_start_sample_us = ggml_time_us();
@ -13557,7 +13557,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
        }
    }
-    // Store decoded codepoints when they are not cached.
+    // Store decoded codepoints when they are not cached (happens when there's a partial utf8 string prefix).
    std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
    if (grammar->partial_utf8.n_remain > 0) {
        candidates_decoded.reserve(candidates->size);
--- a/llama.h
+++ b/llama.h
@ -961,7 +961,7 @@ extern "C" {
    LLAMA_API void llama_sample_grammar(
            struct llama_context * ctx,
          llama_token_data_array * candidates,
-            struct llama_grammar * grammar);
+      const struct llama_grammar * grammar);
    /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
    /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.