From 9bedaf4c7191ec0644f832b37e693a2ed5e2c714 Mon Sep 17 00:00:00 2001 From: Matt Pulver Date: Fri, 25 Aug 2023 09:22:14 -0400 Subject: [PATCH] Add space around * pointers and & references. --- examples/beam_search/beam_search.cpp | 12 ++++++------ examples/server/server.cpp | 21 ++++++++++----------- llama.cpp | 28 ++++++++++++++-------------- llama.h | 8 ++++---- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/examples/beam_search/beam_search.cpp b/examples/beam_search/beam_search.cpp index 2bc0a378b..95a8e5179 100644 --- a/examples/beam_search/beam_search.cpp +++ b/examples/beam_search/beam_search.cpp @@ -29,10 +29,10 @@ // Used for debugging to print out beam tokens. struct ostream_beam_view { - llama_context* ctx; + llama_context * ctx; llama_beam_view beam_view; }; -std::ostream& operator<<(std::ostream& os, ostream_beam_view const& obv) { +std::ostream& operator<<(std::ostream& os, ostream_beam_view const & obv) { os << "p(" << obv.beam_view.p << ") eos(" << std::boolalpha << obv.beam_view.eos << ") tokens("; for (size_t i=0 ; i response; }; -bool is_at_eos(beam_search_callback_data const& callback_data, llama_token const* tokens, size_t const n_tokens) { +bool is_at_eos(beam_search_callback_data const & callback_data, llama_token const * tokens, size_t const n_tokens) { return n_tokens && tokens[n_tokens-1] == llama_token_eos(callback_data.ctx); } @@ -56,7 +56,7 @@ bool is_at_eos(beam_search_callback_data const& callback_data, llama_token const // * When all beams converge to a common prefix, they are made available in beams_state.beams[0]. // This is also called when the stop condition is met. // Collect tokens into std::vector response which is pointed to by callback_data. -void beam_search_callback(void* callback_data_ptr, llama_beams_state beams_state) { +void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_state) { auto& callback_data = *static_cast(callback_data_ptr); // Mark beams as EOS as needed. for (size_t i=0 ; i response which is pointed to by callback_data. -void beam_search_callback(void* callback_data, llama_beams_state beams_state) { - auto& llama = *static_cast(callback_data); +void beam_search_callback(void * callback_data, llama_beams_state beams_state) { + auto & llama = *static_cast(callback_data); // Mark beams as EOS as needed. for (size_t i=0 ; igenerated_token_probs.end() - n); + llama_token const * tokens = beams_state.beam_views[0].tokens; auto const map = [](llama_token tok) { return completion_token_output{{},tok}; }; std::transform(tokens, tokens + n, llama.generated_token_probs.end() - n, map); printf("%lu", n); @@ -1248,20 +1247,20 @@ void beam_search_callback(void* callback_data, llama_beams_state beams_state) { } struct token_translator { - llama_context* ctx; + llama_context * ctx; std::string operator()(llama_token tok) const { return llama_token_to_str(ctx, tok); } std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); } }; -void append_to_generated_text_from_generated_token_probs(llama_server_context& llama) { - auto& gtps = llama.generated_token_probs; +void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { + auto & gtps = llama.generated_token_probs; auto translator = token_translator{llama.ctx}; - auto add_strlen = [=](size_t sum, completion_token_output const& cto) { return sum + translator(cto).size(); }; + auto add_strlen = [=](size_t sum, completion_token_output const & cto) { return sum + translator(cto).size(); }; size_t const len = std::accumulate(gtps.begin(), gtps.end(), size_t(0), add_strlen); if (llama.generated_text.capacity() < llama.generated_text.size() + len) { llama.generated_text.reserve(llama.generated_text.size() + len); } - for (completion_token_output const& cto : gtps) { + for (completion_token_output const & cto : gtps) { llama.generated_text += translator(cto); } } diff --git a/llama.cpp b/llama.cpp index 1e4cf4055..f13c0aa6a 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4335,7 +4335,7 @@ struct llama_beam { float p; // Cumulative beam probability (renormalized relative to all beams) bool eos; // Initialize end-of-sentence to false. Callback sets this to true. // Sort beams by probability. In case of ties, prefer beams at eos. - bool operator<(llama_beam const& rhs) const { + bool operator<(llama_beam const & rhs) const { return std::make_tuple(p, eos) < std::make_tuple(rhs.p, rhs.eos); } // Shift off first n tokens and discard them. @@ -4350,7 +4350,7 @@ struct llama_beam { // A struct for calculating logit-related info. struct logit_info { - float const* const logits; + float const * const logits; int const n_vocab; float const max_l; float const normalizer; @@ -4358,7 +4358,7 @@ struct logit_info { float max_l; float operator()(float sum, float l) const { return sum + std::exp(l - max_l); } }; - logit_info(llama_context* ctx) + logit_info(llama_context * ctx) : logits(llama_get_logits(ctx)) , n_vocab(llama_n_vocab(ctx)) , max_l(*std::max_element(logits, logits + n_vocab)) @@ -4376,7 +4376,7 @@ struct logit_info { for (llama_token token_id=0 ; token_id b.logit; }; + auto comp = [](llama_token_data const & a, llama_token_data const & b) { return a.logit > b.logit; }; std::make_heap(min_heap.begin(), min_heap.end(), comp); for (llama_token token_id=k_min ; token_id b.p; }; + auto const comp = [](llama_beam const & a, llama_beam const & b) { return a.p > b.p; }; if (beam.eos) { // beam is at end-of-sentence, so just copy it to next_beams if its probability is high enough. if (next_beams.size() < n_beams) { @@ -4516,9 +4516,9 @@ struct beam_search { // * any of the beams have not yet reached end-of-sentence, AND // * the highest probability beam(s) (plural in case of ties) are not at end-of-sentence // (since all other beam probabilities can only decrease) - void loop(llama_beam_search_callback_fn_t const callback, void* const callback_data) { + void loop(llama_beam_search_callback_fn_t const callback, void * const callback_data) { beams.push_back({{}, 1.0f, false}); // Start with one empty beam w/ probability = 1.0 and !eos. - auto const not_eos = [](llama_beam const& beam) { return !beam.eos; }; + auto const not_eos = [](llama_beam const & beam) { return !beam.eos; }; for (int i=0 ; i& beams) { - auto const sum_p = [](float sum, llama_beam& beam) { return sum + beam.p; }; + static void renormalize_beam_probabilities(std::vector & beams) { + auto const sum_p = [](float sum, llama_beam & beam) { return sum + beam.p; }; float const inv_sum = 1.0f / std::accumulate(beams.begin(), beams.end(), 0.0f, sum_p); - std::for_each(beams.begin(), beams.end(), [=](llama_beam& beam) { beam.p *= inv_sum; }); + std::for_each(beams.begin(), beams.end(), [=](llama_beam & beam) { beam.p *= inv_sum; }); } // Assumes beams is non-empty. Uses llama_beam::operator<() for ordering. @@ -4564,7 +4564,7 @@ struct beam_search { }; void llama_beam_search(llama_context * ctx, - llama_beam_search_callback_fn_t callback, void* callback_data, + llama_beam_search_callback_fn_t callback, void * callback_data, size_t n_beams, int n_past, int n_predict, int n_threads) { assert(ctx); const int64_t t_start_sample_us = ggml_time_us(); diff --git a/llama.h b/llama.h index 81a27d438..c19a60a5d 100644 --- a/llama.h +++ b/llama.h @@ -470,7 +470,7 @@ extern "C" { // struct llama_beam_view { - llama_token const* tokens; + const llama_token * tokens; size_t n_tokens; float p; // Cumulative beam probability (renormalized relative to all beams) bool eos; // Callback should set this to true when a beam is at end-of-sentence. @@ -481,7 +481,7 @@ extern "C" { // (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks. // These pointers are valid only during the synchronous callback, so should not be saved. struct llama_beams_state { - llama_beam_view* beam_views; + llama_beam_view * beam_views; size_t n_beams; // Number of elements in beam_views[]. size_t common_prefix_length; // Current max length of prefix tokens shared by all beams. bool last_call; // True iff this is the last callback invocation. @@ -490,7 +490,7 @@ extern "C" { // Type of pointer to the beam_search_callback function. // void* callback_data is any custom data passed to llama_beam_search, that is subsequently // passed back to beam_search_callback. This avoids having to use global variables in the callback. - typedef void (*llama_beam_search_callback_fn_t)(void* callback_data, llama_beams_state); + typedef void (*llama_beam_search_callback_fn_t)(void * callback_data, llama_beams_state); /// @details Deterministically returns entire sentence constructed by a beam search. /// @param ctx Pointer to the llama_context. @@ -501,7 +501,7 @@ extern "C" { /// @param n_past Number of tokens already evaluated. /// @param n_predict Maximum number of tokens to predict. EOS may occur earlier. /// @param n_threads Number of threads as passed to llama_eval(). - LLAMA_API void llama_beam_search(struct llama_context * ctx, llama_beam_search_callback_fn_t callback, void* callback_data, size_t n_beams, int n_past, int n_predict, int n_threads); + LLAMA_API void llama_beam_search(struct llama_context * ctx, llama_beam_search_callback_fn_t callback, void * callback_data, size_t n_beams, int n_past, int n_predict, int n_threads); // Performance information LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);