prefer const reference parameters (clang-tidy)
This is recommended by the 'performance-unnecessary-value-param' check.
This commit is contained in:
parent
aae2be0f08
commit
0b0fe663f8
4 changed files with 8 additions and 7 deletions
|
@ -48,8 +48,9 @@ static bool is_interacting = false;
|
||||||
|
|
||||||
void write_logfile(
|
void write_logfile(
|
||||||
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
||||||
const std::vector<llama_token> input_tokens, const std::string output, const std::vector<llama_token> output_tokens) {
|
const std::vector<llama_token> & input_tokens, const std::string & output,
|
||||||
|
const std::vector<llama_token> & output_tokens
|
||||||
|
) {
|
||||||
if (params.logdir.empty()) {
|
if (params.logdir.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a layer is included/excluded by command line
|
// Check if a layer is included/excluded by command line
|
||||||
bool layer_included(const quantize_stats_params params, const std::string & layer) {
|
bool layer_included(const quantize_stats_params & params, const std::string & layer) {
|
||||||
for (const auto& excluded : params.exclude_layers) {
|
for (const auto& excluded : params.exclude_layers) {
|
||||||
if (std::regex_search(layer, std::regex(excluded))) {
|
if (std::regex_search(layer, std::regex(excluded))) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -139,7 +139,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert a vector of completion_token_output to json
|
// convert a vector of completion_token_output to json
|
||||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> probs)
|
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
|
||||||
{
|
{
|
||||||
json out = json::array();
|
json out = json::array();
|
||||||
for (const auto &prob : probs)
|
for (const auto &prob : probs)
|
||||||
|
@ -271,7 +271,7 @@ struct llama_server_context
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<llama_token> tokenize(json json_prompt, bool add_bos)
|
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
||||||
{
|
{
|
||||||
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||||
// or the first element of the json_prompt array is a string.
|
// or the first element of the json_prompt array is a string.
|
||||||
|
@ -1255,7 +1255,7 @@ void beam_search_callback(void * callback_data, llama_beams_state beams_state) {
|
||||||
struct token_translator {
|
struct token_translator {
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
||||||
std::string operator()(completion_token_output cto) const { return (*this)(cto.tok); }
|
std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); }
|
||||||
};
|
};
|
||||||
|
|
||||||
void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) {
|
void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) {
|
||||||
|
|
|
@ -76,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) {
|
||||||
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function<size_t(void)> function) {
|
void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
|
||||||
int64_t min_time_us = INT64_MAX;
|
int64_t min_time_us = INT64_MAX;
|
||||||
int64_t total_time_us = 0;
|
int64_t total_time_us = 0;
|
||||||
int64_t min_time_cycles = INT64_MAX;
|
int64_t min_time_cycles = INT64_MAX;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue