diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 82433d8c0..c9ca7719b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -48,8 +48,9 @@ static bool is_interacting = false; void write_logfile( const llama_context * ctx, const gpt_params & params, const llama_model * model, - const std::vector input_tokens, const std::string output, const std::vector output_tokens) { - + const std::vector & input_tokens, const std::string & output, + const std::vector & output_tokens +) { if (params.logdir.empty()) { return; } diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 06ce18f09..6ce03ba7b 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -71,7 +71,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) { } // Check if a layer is included/excluded by command line -bool layer_included(const quantize_stats_params params, const std::string & layer) { +bool layer_included(const quantize_stats_params & params, const std::string & layer) { for (const auto& excluded : params.exclude_layers) { if (std::regex_search(layer, std::regex(excluded))) { return false; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6b606447d..810000861 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -139,7 +139,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c } // convert a vector of completion_token_output to json -static json probs_vector_to_json(const llama_context *ctx, const std::vector probs) +static json probs_vector_to_json(const llama_context *ctx, const std::vector & probs) { json out = json::array(); for (const auto &prob : probs) @@ -271,7 +271,7 @@ struct llama_server_context return true; } - std::vector tokenize(json json_prompt, bool add_bos) + std::vector tokenize(const json & json_prompt, bool add_bos) const { // If `add_bos` is true, we only add BOS, when json_prompt is a string, // or the first element of the json_prompt array is a string. @@ -1255,7 +1255,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) { struct token_translator { llama_context * ctx; std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); } - std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); } + std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); } }; void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 0bb9537f6..cbea7d452 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -76,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) { return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset; } -void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function function) { +void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function & function) { int64_t min_time_us = INT64_MAX; int64_t total_time_us = 0; int64_t min_time_cycles = INT64_MAX;