From abe0829984d0a4473628b90cf4418b752470de49 Mon Sep 17 00:00:00 2001 From: Matt Pulver Date: Fri, 25 Aug 2023 09:18:24 -0400 Subject: [PATCH] Add '// Beam search' heading to llama.{h,cpp} after llama_grammar_accept_token(). --- llama.cpp | 4 ++++ llama.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/llama.cpp b/llama.cpp index af950d3d5..1e4cf4055 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4326,6 +4326,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar ctx->t_sample_us += ggml_time_us() - t_start_sample_us; } +// +// Beam search +// + struct llama_beam { std::vector tokens; float p; // Cumulative beam probability (renormalized relative to all beams) diff --git a/llama.h b/llama.h index e88a45078..81a27d438 100644 --- a/llama.h +++ b/llama.h @@ -465,6 +465,10 @@ extern "C" { /// @details Accepts the sampled token into the grammar LLAMA_API void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token); + // + // Beam search + // + struct llama_beam_view { llama_token const* tokens; size_t n_tokens; @@ -482,6 +486,7 @@ extern "C" { size_t common_prefix_length; // Current max length of prefix tokens shared by all beams. bool last_call; // True iff this is the last callback invocation. }; + // Type of pointer to the beam_search_callback function. // void* callback_data is any custom data passed to llama_beam_search, that is subsequently // passed back to beam_search_callback. This avoids having to use global variables in the callback.