From 55b008cdec66cd50a96cfab9701193f25af1fca8 Mon Sep 17 00:00:00 2001 From: Michael Klimenko Date: Sat, 27 Jan 2024 22:29:31 +0100 Subject: [PATCH] Add additional fixes Change bind to lambdas Change push_back to emplace_back Replace for with range-based for Use auto to avoid duplication Use bool values instead of 0 Use pass-by-value with std::move --- common/common.cpp | 10 +- common/grammar-parser.cpp | 6 +- common/sampling.cpp | 6 +- common/train.cpp | 4 +- examples/batched-bench/batched-bench.cpp | 10 +- examples/benchmark/benchmark-matmult.cpp | 2 +- examples/embedding/embedding.cpp | 4 +- examples/export-lora/export-lora.cpp | 16 +-- examples/finetune/finetune.cpp | 32 ++--- examples/imatrix/imatrix.cpp | 4 +- examples/infill/infill.cpp | 8 +- examples/llama-bench/llama-bench.cpp | 34 +++--- examples/llava/clip.cpp | 10 +- examples/lookahead/lookahead.cpp | 24 ++-- examples/main/main.cpp | 24 ++-- examples/perplexity/perplexity.cpp | 16 +-- examples/server/httplib.h | 114 +++++++++--------- examples/server/server.cpp | 42 +++---- examples/server/utils.hpp | 15 +-- examples/tokenize/tokenize.cpp | 6 +- .../train-text-from-scratch.cpp | 17 ++- llama.cpp | 38 +++--- tests/test-backend-ops.cpp | 10 +- tests/test-grad0.cpp | 12 +- tests/test-grammar-parser.cpp | 22 ++-- tests/test-llama-grammar.cpp | 6 +- tests/test-quantize-fns.cpp | 2 +- tests/test-quantize-perf.cpp | 2 +- 28 files changed, 237 insertions(+), 259 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index f8fdcfe23..7a9583b65 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -511,7 +511,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f)); + params.lora_adapter.emplace_back(argv[i], 1.0f); params.use_mmap = false; } else if (arg == "--lora-scaled") { if (++i >= argc) { @@ -523,7 +523,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i]))); + params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); params.use_mmap = false; } else if (arg == "--lora-base") { if (++i >= argc) { @@ -875,7 +875,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { } if (!params.kv_overrides.empty()) { - params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.emplace_back(); params.kv_overrides.back().key[0] = 0; } @@ -1335,8 +1335,8 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vectorparams = params; result->grammar = nullptr; @@ -197,8 +197,8 @@ static llama_token llama_sampling_sample_impl( } // apply params.logit_bias map - for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) { - logits[it->first] += it->second; + for (auto logit_bia : params.logit_bias) { + logits[logit_bia.first] += logit_bia.second; } if (ctx_cfg) { diff --git a/common/train.cpp b/common/train.cpp index b309808b5..c51a1839a 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -18,7 +18,7 @@ struct random_uniform_distribution { }; struct train_state * init_train_state() { - struct train_state * state = new struct train_state; + auto state = new struct train_state; state->train_its = 0; state->train_samples = 0; state->train_tokens = 0; @@ -1379,7 +1379,7 @@ void finish_processing_train_args(struct train_params_common * params) { } void train_opt_callback(void * vdata, int accum_step, float * sched, bool * cancel) { - struct train_opt_callback_data * data = (struct train_opt_callback_data *) vdata; + auto data = (struct train_opt_callback_data *) vdata; struct train_params_common * params = data->params; struct train_state * train = data->train; struct ggml_opt_context * opt = train->opt; diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 7924db267..ed5037451 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -164,13 +164,9 @@ int main(int argc, char ** argv) { LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s"); LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------"); - for ( int i_pp = 0; i_pp < (int) n_pp.size(); ++i_pp) { - for ( int i_tg = 0; i_tg < (int) n_tg.size(); ++i_tg) { - for (int i_pl = 0; i_pl < (int) n_pl.size(); ++i_pl) { - const int pp = n_pp[i_pp]; - const int tg = n_tg[i_tg]; - const int pl = n_pl[i_pl]; - + for (int pp : n_pp) { + for (int tg : n_tg) { + for (int pl : n_pl) { const int n_ctx_req = is_pp_shared ? pp + pl*tg : pl*(pp + tg); if (n_ctx_req > n_kv_max) { diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index e89f3de2f..07c51313e 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -143,7 +143,7 @@ int main(int argc, char ** argv) { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, /*.mem_buffer =*/ NULL, - /* no_alloc =*/ 0 + /* no_alloc =*/ false }; ctx = ggml_init(params); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3295cd240..35a0cb912 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -64,8 +64,8 @@ int main(int argc, char ** argv) { fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str()); + for (int i : embd_inp) { + fprintf(stderr, "%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } fprintf(stderr, "\n"); } diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 14a4e97d8..182976563 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -225,7 +225,7 @@ static void free_lora(struct lora_data * lora) { } static struct lora_data * load_lora(struct lora_info * info) { - struct lora_data * result = new struct lora_data; + auto result = new struct lora_data; result->info = *info; result->ctx = NULL; result->lora_r = 1; @@ -370,9 +370,9 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int static void export_lora(struct export_lora_params * params) { // load all loras std::vector loras; - for (size_t i = 0; i < params->lora.size(); ++i) { - struct lora_data * lora = load_lora(¶ms->lora[i]); - if (lora != NULL) { + for (auto& i : params->lora) { + auto lora = load_lora(&i); + if (lora) { loras.push_back(lora); } } @@ -431,8 +431,8 @@ static void export_lora(struct export_lora_params * params) { fin.read_raw(data.data(), data.size()); // apply all loras - for (size_t k = 0; k < loras.size(); ++k) { - apply_lora(tensor, loras[k], params->n_threads); + for (auto& lora : loras) { + apply_lora(tensor, lora, params->n_threads); } // write tensor data + padding @@ -455,8 +455,8 @@ static void export_lora(struct export_lora_params * params) { gguf_free(gguf_in); // free loras - for (size_t i = 0; i < loras.size(); ++i) { - free_lora(loras[i]); + for (auto& lora : loras) { + free_lora(lora); } } diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 2a326a2c4..4dc588be6 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -379,8 +379,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora) ggml_allocr_alloc(alloc, lora->norm_b); ggml_allocr_alloc(alloc, lora->output_a); ggml_allocr_alloc(alloc, lora->output_b); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; + for (auto& layer : lora->layers) { ggml_allocr_alloc(alloc, layer.attention_norm_a); ggml_allocr_alloc(alloc, layer.attention_norm_b); ggml_allocr_alloc(alloc, layer.wq_a); @@ -406,8 +405,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora) ggml_allocr_alloc(alloc, lora->norm_b->grad); ggml_allocr_alloc(alloc, lora->output_a->grad); ggml_allocr_alloc(alloc, lora->output_b->grad); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; + for (auto& layer : lora->layers) { ggml_allocr_alloc(alloc, layer.attention_norm_a->grad); ggml_allocr_alloc(alloc, layer.attention_norm_b->grad); ggml_allocr_alloc(alloc, layer.wq_a->grad); @@ -803,9 +801,9 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( // allocating checkpoints in one block to reduce memory fragmentation // note: they will be freed in reverse order - for (unsigned int i = 0; i < checkpoints.size(); ++i) { - if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) { - ggml_allocr_alloc(alloc, checkpoints[i]); + for (auto& checkpoint : checkpoints) { + if (checkpoint->data == NULL && checkpoint->view_src == NULL) { + ggml_allocr_alloc(alloc, checkpoint); } } @@ -872,8 +870,7 @@ static void load_llama_lora_gguf(struct gguf_context * fctx, struct ggml_context copy_tensor_by_name(lora->output_a, f_ggml_ctx, ggml_get_name(lora->output_a)); copy_tensor_by_name(lora->output_b, f_ggml_ctx, ggml_get_name(lora->output_b)); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; + for (auto& layer : lora->layers) { copy_tensor_by_name(layer.attention_norm_a, f_ggml_ctx, ggml_get_name(layer.attention_norm_a)); copy_tensor_by_name(layer.attention_norm_b, f_ggml_ctx, ggml_get_name(layer.attention_norm_b)); copy_tensor_by_name(layer.wq_a, f_ggml_ctx, ggml_get_name(layer.wq_a)); @@ -940,9 +937,7 @@ static void save_llama_lora_gguf(struct gguf_context * fctx, struct my_llama_mod gguf_add_tensor(fctx, lora->output_a); gguf_add_tensor(fctx, lora->output_b); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; - + for (auto& layer : lora->layers) { gguf_add_tensor(fctx, layer.attention_norm_a); gguf_add_tensor(fctx, layer.attention_norm_b); gguf_add_tensor(fctx, layer.wq_a); @@ -1476,7 +1471,7 @@ struct save_train_files_data { }; static void save_train_files(void * vdata, struct train_state * train) { - struct save_train_files_data * data = (struct save_train_files_data *) vdata; + auto data = (struct save_train_files_data *) vdata; int64_t iter = train->opt->iter; @@ -1499,8 +1494,7 @@ static int64_t get_parameter_count(struct my_llama_lora* lora) { nx += ggml_nelements(lora->output_a); nx += ggml_nelements(lora->output_b); - for (uint32_t i = 0; i < lora->layers.size(); ++i) { - auto & layer = lora->layers[i]; + for (auto& layer : lora->layers) { nx += ggml_nelements(layer.attention_norm_a); nx += ggml_nelements(layer.attention_norm_b); nx += ggml_nelements(layer.wq_a); @@ -1817,12 +1811,12 @@ int main(int argc, char ** argv) { std::vector token_noccurs; token_noccurs.resize(model.hparams.n_vocab, 0); - for (unsigned int i = 0; i < train_tokens.size(); ++i) { - ++token_noccurs[train_tokens[i]]; + for (int train_token : train_tokens) { + ++token_noccurs[train_token]; } int n_unique_tokens = 0; - for (unsigned int i = 0; i < token_noccurs.size(); ++i) { - if (token_noccurs[i] == 0) continue; + for (unsigned long long token_noccur : token_noccurs) { + if (token_noccur == 0) continue; ++n_unique_tokens; } printf("%s: number of unique tokens: %d\n", __func__, n_unique_tokens); diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index ea06fcdbf..4cf05f7c8 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -216,8 +216,8 @@ static std::vector softmax(const std::vector& logits) { sum_exp += exp_logit; probs[i] = exp_logit; } - for (size_t i = 0; i < probs.size(); i++) { - probs[i] /= sum_exp; + for (float& prob : probs) { + prob /= static_cast(sum_exp); } return probs; } diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 72fb133b4..bebed4433 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -313,16 +313,16 @@ int main(int argc, char ** argv) { LOG_TEE("\n"); LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < (int) embd_inp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str()); + for (int i : embd_inp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } if (ctx_guidance) { LOG_TEE("\n"); LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str()); LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size()); - for (int i = 0; i < (int) guidance_inp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str()); + for (int i : guidance_inp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } } diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 5053d3f52..4cf432496 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -745,7 +745,7 @@ const std::string test::cpu_info = get_cpu_info(); const std::string test::gpu_info = get_gpu_info(); struct printer { - virtual ~printer() {} + virtual ~printer() = default; FILE * fout; virtual void print_header(const cmd_params & params) { (void) params; } @@ -891,43 +891,43 @@ struct markdown_printer : public printer { void print_header(const cmd_params & params) override { // select fields to print - fields.push_back("model"); - fields.push_back("size"); - fields.push_back("params"); - fields.push_back("backend"); + fields.emplace_back("model"); + fields.emplace_back("size"); + fields.emplace_back("params"); + fields.emplace_back("backend"); bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS"; if (!is_cpu_backend) { - fields.push_back("n_gpu_layers"); + fields.emplace_back("n_gpu_layers"); } if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) { - fields.push_back("n_threads"); + fields.emplace_back("n_threads"); } if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) { - fields.push_back("n_batch"); + fields.emplace_back("n_batch"); } if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) { - fields.push_back("type_k"); + fields.emplace_back("type_k"); } if (params.type_v.size() > 1 || params.type_v != cmd_params_defaults.type_v) { - fields.push_back("type_v"); + fields.emplace_back("type_v"); } if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) { - fields.push_back("main_gpu"); + fields.emplace_back("main_gpu"); } if (params.split_mode.size() > 1 || params.split_mode != cmd_params_defaults.split_mode) { - fields.push_back("split_mode"); + fields.emplace_back("split_mode"); } if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) { - fields.push_back("mul_mat_q"); + fields.emplace_back("mul_mat_q"); } if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) { - fields.push_back("no_kv_offload"); + fields.emplace_back("no_kv_offload"); } if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) { - fields.push_back("tensor_split"); + fields.emplace_back("tensor_split"); } - fields.push_back("test"); - fields.push_back("t/s"); + fields.emplace_back("test"); + fields.emplace_back("t/s"); fprintf(fout, "|"); for (const auto & field : fields) { diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 3d43c9d99..c4a18748f 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -800,7 +800,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { buffer_size += n_tensors * 128 /* CLIP PADDING */; - clip_ctx * new_clip = new clip_ctx; + auto* new_clip = new clip_ctx; // update projector type { @@ -1416,13 +1416,13 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0); int64_t sum_all = 0; - for (size_t i = 0; i < hist_all.size(); ++i) { - sum_all += hist_all[i]; + for (auto i : hist_all) { + sum_all += i; } printf("%s: hist: ", __func__); - for (size_t i = 0; i < hist_all.size(); ++i) { - printf("%5.3f ", hist_all[i] / (float)sum_all); + for (auto i : hist_all) { + printf("%5.3f ", i / (float)sum_all); } printf("\n"); } diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index ba949ff6e..d0adfec12 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -132,7 +132,7 @@ int main(int argc, char ** argv) { for (int i = 0; i < W; i++) { // there are different ways to init these tokens - if (0) { + if (false) { // initialize randomly from the prompt tokens tokens_j[j][i] = all[1 + rand() % (all.size() - 1)]; } else { @@ -268,10 +268,10 @@ int main(int argc, char ** argv) { // if no active ngrams are left, it means the sampled token does not pass the verification if (v > 0) { - for (int g = 0; g < (int) ngrams_cur.size(); g++) { - if (ngrams_cur[g].active) { - i_batch = ngrams_cur[g].i_batch[v]; - seq_id_best = ngrams_cur[g].seq_id; + for (auto& g : ngrams_cur) { + if (g.active) { + i_batch = g.i_batch[v]; + seq_id_best = g.seq_id; ++n_accept; break; @@ -316,20 +316,20 @@ int main(int argc, char ** argv) { } // verify across active n-grams - for (int g = 0; g < (int) ngrams_cur.size(); g++) { - if (ngrams_cur[g].active) { + for (auto& g : ngrams_cur) { + if (g.active) { if (v == N - 1) { - ngrams_cur[g].active = false; + g.active = false; } else { - if (id != ngrams_cur[g].tokens[v + 1]) { - ngrams_cur[g].active = false; + if (id != g.tokens[v + 1]) { + g.active = false; } } } } // print known n-grams starting with token id (debug) - if (0 && v == 0) { + if (false && v == 0) { if (ngrams_observed.cnt[id] > 0) { printf("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], llama_token_to_piece(ctx, id).c_str()); } @@ -367,7 +367,7 @@ int main(int argc, char ** argv) { } else { for (int i = 0; i < W; i++) { // there are different ways to init these tokens - if (0) { + if (false) { // random init tokens_j[N - 2][i] = all[1 + rand() % (all.size() - 1)]; } else { diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 58b7f807a..4d52cf284 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -344,12 +344,12 @@ int main(int argc, char ** argv) { // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { params.interactive_first = true; - params.antiprompt.push_back("### Instruction:\n\n"); + params.antiprompt.emplace_back("### Instruction:\n\n"); } // similar for chatml mode else if (params.chatml) { params.interactive_first = true; - params.antiprompt.push_back("<|im_start|>user\n"); + params.antiprompt.emplace_back("<|im_start|>user\n"); } // enable interactive mode if interactive start is specified @@ -361,16 +361,16 @@ int main(int argc, char ** argv) { LOG_TEE("\n"); LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < (int) embd_inp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str()); + for (int i : embd_inp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } if (ctx_guidance) { LOG_TEE("\n"); LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str()); LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size()); - for (int i = 0; i < (int) guidance_inp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str()); + for (int i : guidance_inp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } } @@ -405,8 +405,8 @@ int main(int argc, char ** argv) { LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, antiprompt, false, true); - for (int i = 0; i < (int) tmp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + for (int i : tmp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } } } @@ -420,8 +420,8 @@ int main(int argc, char ** argv) { LOG_TEE("Input prefix: '%s'\n", params.input_prefix.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true); - for (int i = 0; i < (int) tmp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + for (int i : tmp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } } } @@ -430,8 +430,8 @@ int main(int argc, char ** argv) { LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true); - for (int i = 0; i < (int) tmp.size(); i++) { - LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + for (int i : tmp) { + LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); } } } diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index a14a23313..cf87cf5a9 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -94,8 +94,8 @@ static std::vector softmax(const std::vector& logits) { sum_exp += exp_logit; probs[i] = exp_logit; } - for (size_t i = 0; i < probs.size(); i++) { - probs[i] /= sum_exp; + for (float& prob : probs) { + prob /= static_cast(sum_exp); } return probs; } @@ -881,7 +881,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { size_t li = hs_cur.common_prefix; for (int s = 0; s < 4; ++s) { for (size_t j = hs_cur.common_prefix; j < hs_cur.seq_tokens[s].size() - 1; j++) { - eval_pairs.push_back(std::make_pair(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1])); + eval_pairs.emplace_back(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]); } ++li; } @@ -997,7 +997,7 @@ static std::vector load_winogrande_from_csv(const std::string& printf("%s: no _ in <%s>\n", __func__, sentence.c_str()); continue; } - std::istringstream stream(answer.c_str()); + std::istringstream stream(answer); int i_answer; stream >> i_answer; if (stream.fail() || i_answer < 1 || i_answer > 2) { printf("%s: failed to parse answer <%s>\n", __func__, answer.c_str()); @@ -1158,13 +1158,13 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) { const int last_1st = task.seq_tokens[0].size() - n_base1 > 1 ? 1 : 0; size_t li = n_base1 - 1; for (size_t j = n_base1-1; j < task.seq_tokens[0].size()-1-last_1st; ++j) { - eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[0][j+1])); + eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[0][j+1]); } const auto& n_base2 = skip_choice ? task.n_base2 : task.common_prefix; const int last_2nd = task.seq_tokens[1].size() - n_base2 > 1 ? 1 : 0; li = task.seq_tokens[0].size() - task.common_prefix + n_base2 - 1; for (size_t j = n_base2-1; j < task.seq_tokens[1].size()-1-last_2nd; ++j) { - eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[1][j+1])); + eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[1][j+1]); } } compute_logprobs(batch_logits.data(), n_vocab, workers, eval_pairs, eval_results); @@ -1221,7 +1221,7 @@ static bool deserialize_string(std::istream & in, std::string & str) { uint32_t size; if (!in.read((char *)&size, sizeof(size)).fail()) { str.resize(size); - if (!in.read((char *)&str[0], size).fail()) return true; + if (!in.read((char *)str.data(), size).fail()) return true; } return false; } @@ -1523,7 +1523,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params size_t li = cur_task.common_prefix; for (int s = 0; s < int(cur_task.seq_tokens.size()); ++s) { for (size_t j = cur_task.common_prefix; j < cur_task.seq_tokens[s].size() - 1; j++) { - eval_pairs.push_back(std::make_pair(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1])); + eval_pairs.emplace_back(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]); } ++li; } diff --git a/examples/server/httplib.h b/examples/server/httplib.h index e495d8299..a8244fd64 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -223,6 +223,7 @@ using socket_t = int; #include #include #include +#include #ifdef CPPHTTPLIB_OPENSSL_SUPPORT #ifdef _WIN32 @@ -705,7 +706,7 @@ public: Server &set_file_request_handler(Handler handler); Server &set_error_handler(HandlerWithResponse handler); - Server &set_error_handler(Handler handler); + Server &set_error_handler(const Handler& handler); Server &set_exception_handler(ExceptionHandler handler); Server &set_pre_routing_handler(HandlerWithResponse handler); Server &set_post_routing_handler(Handler handler); @@ -781,7 +782,7 @@ private: bool dispatch_request(Request &req, Response &res, const Handlers &handlers); bool dispatch_request_for_content_reader(Request &req, Response &res, - ContentReader content_reader, + const ContentReader& content_reader, const HandlersForContentReader &handlers); bool parse_request_line(const char *s, Request &req); @@ -804,7 +805,7 @@ private: MultipartContentHeader multipart_header, ContentReceiver multipart_receiver); bool read_content_core(Stream &strm, Request &req, Response &res, - ContentReceiver receiver, + const ContentReceiver& receiver, MultipartContentHeader multipart_header, ContentReceiver multipart_receiver); @@ -910,8 +911,8 @@ public: explicit ClientImpl(const std::string &host, int port); explicit ClientImpl(const std::string &host, int port, - const std::string &client_cert_path, - const std::string &client_key_path); + std::string client_cert_path, + std::string client_key_path); virtual ~ClientImpl(); @@ -937,7 +938,7 @@ public: Result Get(const std::string &path, ResponseHandler response_handler, ContentReceiver content_receiver, Progress progress); Result Get(const std::string &path, const Headers &headers, - ResponseHandler response_handler, ContentReceiver content_receiver, + ResponseHandler response_handler, const ContentReceiver& content_receiver, Progress progress); Result Get(const std::string &path, const Params ¶ms, @@ -946,8 +947,8 @@ public: const Headers &headers, ContentReceiver content_receiver, Progress progress = nullptr); Result Get(const std::string &path, const Params ¶ms, - const Headers &headers, ResponseHandler response_handler, - ContentReceiver content_receiver, Progress progress = nullptr); + const Headers &headers, const ResponseHandler& response_handler, + const ContentReceiver& content_receiver, const Progress& progress = nullptr); Result Head(const std::string &path); Result Head(const std::string &path, const Headers &headers); @@ -1790,7 +1791,7 @@ void hosted_at(const std::string &hostname, std::vector &addrs); std::string append_query_params(const std::string &path, const Params ¶ms); -std::pair make_range_header(Ranges ranges); +std::pair make_range_header(const Ranges& ranges); std::pair make_basic_authentication_header(const std::string &username, @@ -1808,12 +1809,12 @@ void read_file(const std::string &path, std::string &out); std::string trim_copy(const std::string &s); void split(const char *b, const char *e, char d, - std::function fn); + const std::function& fn); bool process_client_socket(socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, time_t write_timeout_usec, - std::function callback); + const std::function& callback); socket_t create_client_socket( const std::string &host, const std::string &ip, int port, @@ -2231,8 +2232,8 @@ inline void read_file(const std::string &path, std::string &out) { fs.seekg(0, std::ios_base::end); auto size = fs.tellg(); fs.seekg(0); - out.resize(static_cast(size)); - fs.read(&out[0], static_cast(size)); + out.resize(size); + fs.read(&out[0], size); } inline std::string file_extension(const std::string &path) { @@ -2261,7 +2262,7 @@ inline std::string trim_copy(const std::string &s) { } inline void split(const char *b, const char *e, char d, - std::function fn) { + const std::function& fn) { size_t i = 0; size_t beg = 0; @@ -2624,7 +2625,7 @@ inline bool process_client_socket(socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, time_t write_timeout_usec, - std::function callback) { + const std::function& callback) { SocketStream strm(sock, read_timeout_sec, read_timeout_usec, write_timeout_sec, write_timeout_usec); return callback(strm); @@ -3037,7 +3038,7 @@ find_content_type(const std::string &path, case "svg"_t: return "image/svg+xml"; case "webp"_t: return "image/webp"; case "ico"_t: return "image/x-icon"; - case "tif"_t: return "image/tiff"; + case "tif"_t: case "tiff"_t: return "image/tiff"; case "jpg"_t: case "jpeg"_t: return "image/jpeg"; @@ -3488,13 +3489,13 @@ inline bool read_headers(Stream &strm, Headers &headers) { } inline bool read_content_with_length(Stream &strm, uint64_t len, - Progress progress, - ContentReceiverWithProgress out) { + const Progress& progress, + const ContentReceiverWithProgress& out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; while (r < len) { - auto read_len = static_cast(len - r); + auto read_len = len - r; auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); if (n <= 0) { return false; } @@ -3513,7 +3514,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; while (r < len) { - auto read_len = static_cast(len - r); + auto read_len = len - r; auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); if (n <= 0) { return; } r += static_cast(n); @@ -3521,7 +3522,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) { } inline bool read_content_without_length(Stream &strm, - ContentReceiverWithProgress out) { + const ContentReceiverWithProgress& out) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; for (;;) { @@ -3983,12 +3984,12 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) try { if (std::regex_match(b, e, cm, re_another_range)) { ssize_t first = -1; if (!cm.str(1).empty()) { - first = static_cast(std::stoll(cm.str(1))); + first = std::stoll(cm.str(1)); } ssize_t last = -1; if (!cm.str(2).empty()) { - last = static_cast(std::stoll(cm.str(2))); + last = std::stoll(cm.str(2)); } if (first != -1 && last != -1 && first > last) { @@ -4254,9 +4255,8 @@ inline std::string make_multipart_data_boundary() { inline bool is_multipart_boundary_chars_valid(const std::string &boundary) { auto valid = true; - for (size_t i = 0; i < boundary.size(); i++) { - auto c = boundary[i]; - if (!std::isalnum(c) && c != '-' && c != '_') { + for (char c : boundary) { + if (!std::isalnum(c) && c != '-' && c != '_') { valid = false; break; } @@ -4707,7 +4707,7 @@ inline bool parse_www_authenticate(const Response &res, s = s.substr(pos + 1); auto beg = std::sregex_iterator(s.begin(), s.end(), re); for (auto i = beg; i != std::sregex_iterator(); ++i) { - auto m = *i; + const auto& m = *i; auto key = s.substr(static_cast(m.position(1)), static_cast(m.length(1))); auto val = m.length(2) > 0 @@ -4802,7 +4802,7 @@ inline std::string append_query_params(const std::string &path, } // Header utilities -inline std::pair make_range_header(Ranges ranges) { +inline std::pair make_range_header(const Ranges& ranges) { std::string field = "bytes="; auto i = 0; for (auto r : ranges) { @@ -4949,7 +4949,7 @@ inline void Response::set_content_provider( set_header("Content-Type", content_type); content_length_ = in_length; if (in_length > 0) { content_provider_ = std::move(provider); } - content_provider_resource_releaser_ = resource_releaser; + content_provider_resource_releaser_ = std::move(resource_releaser); is_chunked_content_provider_ = false; } @@ -4959,7 +4959,7 @@ inline void Response::set_content_provider( set_header("Content-Type", content_type); content_length_ = 0; content_provider_ = detail::ContentProviderAdapter(std::move(provider)); - content_provider_resource_releaser_ = resource_releaser; + content_provider_resource_releaser_ = std::move(resource_releaser); is_chunked_content_provider_ = false; } @@ -4969,7 +4969,7 @@ inline void Response::set_chunked_content_provider( set_header("Content-Type", content_type); content_length_ = 0; content_provider_ = detail::ContentProviderAdapter(std::move(provider)); - content_provider_resource_releaser_ = resource_releaser; + content_provider_resource_releaser_ = std::move(resource_releaser); is_chunked_content_provider_ = true; } @@ -5010,7 +5010,7 @@ inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec, write_timeout_sec_(write_timeout_sec), write_timeout_usec_(write_timeout_usec), read_buff_(read_buff_size_, 0) {} -inline SocketStream::~SocketStream() {} +inline SocketStream::~SocketStream() = default; inline bool SocketStream::is_readable() const { return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; @@ -5101,7 +5101,7 @@ inline ssize_t BufferStream::read(char *ptr, size_t size) { #else auto len_read = buffer.copy(ptr, size, position); #endif - position += static_cast(len_read); + position += len_read; return static_cast(len_read); } @@ -5131,7 +5131,7 @@ inline Server::Server() #endif } -inline Server::~Server() {} +inline Server::~Server() = default; inline Server &Server::Get(const std::string &pattern, Handler handler) { get_handlers_.push_back( @@ -5241,7 +5241,7 @@ inline Server &Server::set_error_handler(HandlerWithResponse handler) { return *this; } -inline Server &Server::set_error_handler(Handler handler) { +inline Server &Server::set_error_handler(const Handler& handler) { error_handler_ = [handler](const Request &req, Response &res) { handler(req, res); return HandlerResponse::Handled; @@ -5618,7 +5618,7 @@ inline bool Server::read_content_with_content_receiver( } inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, - ContentReceiver receiver, + const ContentReceiver& receiver, MultipartContentHeader multipart_header, ContentReceiver multipart_receiver) { detail::MultipartFormDataParser multipart_form_data_parser; @@ -5688,7 +5688,7 @@ inline bool Server::handle_file_request(const Request &req, Response &res, detail::find_content_type(path, file_extension_and_mimetype_map_); if (type) { res.set_header("Content-Type", type); } for (const auto &kv : entry.headers) { - res.set_header(kv.first.c_str(), kv.second); + res.set_header(kv.first, kv.second); } res.status = req.has_header("Range") ? 206 : 200; if (!head && file_request_handler_) { @@ -6024,7 +6024,7 @@ inline void Server::apply_ranges(const Request &req, Response &res, } inline bool Server::dispatch_request_for_content_reader( - Request &req, Response &res, ContentReader content_reader, + Request &req, Response &res, const ContentReader& content_reader, const HandlersForContentReader &handlers) { for (const auto &x : handlers) { const auto &pattern = x.first; @@ -6202,11 +6202,11 @@ inline ClientImpl::ClientImpl(const std::string &host, int port) : ClientImpl(host, port, std::string(), std::string()) {} inline ClientImpl::ClientImpl(const std::string &host, int port, - const std::string &client_cert_path, - const std::string &client_key_path) + std::string client_cert_path, + std::string client_key_path) : host_(host), port_(port), host_and_port_(adjust_host_string(host) + ":" + std::to_string(port)), - client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} + client_cert_path_(std::move(client_cert_path)), client_key_path_(std::move(client_key_path)) {} inline ClientImpl::~ClientImpl() { std::lock_guard guard(socket_mutex_); @@ -6579,7 +6579,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { return false; #endif } else { - ClientImpl cli(next_host.c_str(), next_port); + ClientImpl cli(next_host, next_port); cli.copy_settings(*this); return detail::redirect(cli, req, res, path, location, error); } @@ -7056,7 +7056,7 @@ inline Result ClientImpl::Get(const std::string &path, inline Result ClientImpl::Get(const std::string &path, const Headers &headers, ResponseHandler response_handler, - ContentReceiver content_receiver, + const ContentReceiver& content_receiver, Progress progress) { Request req; req.method = "GET"; @@ -7078,27 +7078,27 @@ inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, if (params.empty()) { return Get(path, headers); } std::string path_with_query = append_query_params(path, params); - return Get(path_with_query.c_str(), headers, progress); + return Get(path_with_query, headers, std::move(progress)); } inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, const Headers &headers, ContentReceiver content_receiver, Progress progress) { - return Get(path, params, headers, nullptr, content_receiver, progress); + return Get(path, params, headers, nullptr, std::move(content_receiver), std::move(progress)); } inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, const Headers &headers, - ResponseHandler response_handler, - ContentReceiver content_receiver, - Progress progress) { + const ResponseHandler& response_handler, + const ContentReceiver& content_receiver, + const Progress& progress) { if (params.empty()) { return Get(path, headers, response_handler, content_receiver, progress); } std::string path_with_query = append_query_params(path, params); - return Get(path_with_query.c_str(), headers, response_handler, + return Get(path_with_query, headers, response_handler, content_receiver, progress); } @@ -7201,7 +7201,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers, const auto &content_type = detail::serialize_multipart_formdata_get_content_type(boundary); const auto &body = detail::serialize_multipart_formdata(items, boundary); - return Post(path, headers, body, content_type.c_str()); + return Post(path, headers, body, content_type); } inline Result ClientImpl::Post(const std::string &path, const Headers &headers, @@ -7214,7 +7214,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers, const auto &content_type = detail::serialize_multipart_formdata_get_content_type(boundary); const auto &body = detail::serialize_multipart_formdata(items, boundary); - return Post(path, headers, body, content_type.c_str()); + return Post(path, headers, body, content_type); } inline Result @@ -8361,7 +8361,7 @@ inline Client::Client(const std::string &host, int port, : cli_(detail::make_unique(host, port, client_cert_path, client_key_path)) {} -inline Client::~Client() {} +inline Client::~Client() = default; inline bool Client::is_valid() const { return cli_ != nullptr && cli_->is_valid(); @@ -8421,19 +8421,19 @@ inline Result Client::Get(const std::string &path, const Headers &headers, } inline Result Client::Get(const std::string &path, const Params ¶ms, const Headers &headers, Progress progress) { - return cli_->Get(path, params, headers, progress); + return cli_->Get(path, params, headers, std::move(progress)); } inline Result Client::Get(const std::string &path, const Params ¶ms, const Headers &headers, ContentReceiver content_receiver, Progress progress) { - return cli_->Get(path, params, headers, content_receiver, progress); + return cli_->Get(path, params, headers, std::move(content_receiver), std::move(progress)); } inline Result Client::Get(const std::string &path, const Params ¶ms, const Headers &headers, ResponseHandler response_handler, ContentReceiver content_receiver, Progress progress) { - return cli_->Get(path, params, headers, response_handler, content_receiver, - progress); + return cli_->Get(path, params, headers, std::move(response_handler), std::move(content_receiver), + std::move(progress)); } inline Result Client::Head(const std::string &path) { return cli_->Head(path); } @@ -8754,7 +8754,7 @@ inline void Client::enable_server_certificate_verification(bool enabled) { } #endif -inline void Client::set_logger(Logger logger) { cli_->set_logger(logger); } +inline void Client::set_logger(Logger logger) { cli_->set_logger(std::move(logger)); } #ifdef CPPHTTPLIB_OPENSSL_SUPPORT inline void Client::set_ca_cert_path(const std::string &ca_cert_file_path, diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 7dcb5950d..ad47e3703 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1202,9 +1202,8 @@ struct llama_server_context (json)(slot.images[image_idx].prefix_prompt); std::vector append_tokens = tokenize(json_prompt, false); // has next image - for (int i = 0; i < (int) append_tokens.size(); ++i) - { - llama_batch_add(batch, append_tokens[i], slot.n_past, { slot.id }, true); + for (int append_token : append_tokens) { + llama_batch_add(batch, append_token, slot.n_past, { slot.id }, true); slot.n_past += 1; } } @@ -2034,7 +2033,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f)); + params.lora_adapter.emplace_back(argv[i], 1.0f); params.use_mmap = false; } else if (arg == "--lora-scaled") @@ -2050,7 +2049,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i]))); + params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); params.use_mmap = false; } else if (arg == "--lora-base") @@ -2192,7 +2191,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, } } if (!params.kv_overrides.empty()) { - params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.emplace_back(); params.kv_overrides.back().key[0] = 0; } @@ -2626,12 +2625,11 @@ int main(int argc, char **argv) if (!llama_result.error) { std::vector result_array = format_partial_response_oaicompat( llama_result); - for (auto it = result_array.begin(); it != result_array.end(); ++it) - { - if (!it->empty()) { + for (auto& it : result_array) { + if (!it.empty()) { const std::string str = "data: " + - it->dump(-1, ' ', false, json::error_handler_t::replace) + + it.dump(-1, ' ', false, json::error_handler_t::replace) + "\n\n"; LOG_VERBOSE("data stream", {{"to_send", str}}); if (!sink.write(str.c_str(), str.size())) { @@ -2824,19 +2822,17 @@ int main(int argc, char **argv) }*/ //); - llama.queue_tasks.on_new_task(std::bind( - &llama_server_context::process_single_task, &llama, std::placeholders::_1)); - llama.queue_tasks.on_finish_multitask(std::bind( - &llama_server_context::on_finish_multitask, &llama, std::placeholders::_1)); - llama.queue_tasks.on_all_tasks_finished(std::bind( - &llama_server_context::run_on_all_tasks_finished, &llama)); - llama.queue_results.on_multitask_update(std::bind( - &llama_server_queue::update_multitask, - &llama.queue_tasks, - std::placeholders::_1, - std::placeholders::_2, - std::placeholders::_3 - )); + llama.queue_tasks.on_new_task([ObjectPtr = &llama](auto&& PH1) { + ObjectPtr->process_single_task(std::forward(PH1)); + }); + llama.queue_tasks.on_finish_multitask([ObjectPtr = &llama](auto&& PH1) { + ObjectPtr->on_finish_multitask(std::forward(PH1)); + }); + llama.queue_tasks.on_all_tasks_finished([ObjectPtr = &llama] { ObjectPtr->run_on_all_tasks_finished(); }); + llama.queue_results.on_multitask_update([ObjectPtr = &llama.queue_tasks](auto&& PH1, auto&& PH2, auto&& PH3) { + ObjectPtr->update_multitask(std::forward(PH1), std::forward(PH2), + std::forward(PH3)); + }); llama.queue_tasks.start_loop(); t.join(); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index b6d6d27c5..b65bc007f 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -171,10 +172,10 @@ inline std::string format_chatml(std::vector messages) { std::ostringstream chatml_msgs; - for (auto it = messages.begin(); it != messages.end(); ++it) { + for (auto& message : messages) { chatml_msgs << "<|im_start|>" - << json_value(*it, "role", std::string("user")) << '\n'; - chatml_msgs << json_value(*it, "content", std::string("")) + << json_value(message, "role", std::string("user")) << '\n'; + chatml_msgs << json_value(message, "content", std::string("")) << "<|im_end|>\n"; } @@ -225,17 +226,17 @@ struct llama_server_queue { // Register function to process a new task void on_new_task(std::function callback) { - callback_new_task = callback; + callback_new_task = std::move(callback); } // Register function to process a multitask void on_finish_multitask(std::function callback) { - callback_finish_multitask = callback; + callback_finish_multitask = std::move(callback); } // Register the function to be called when the batch of tasks is finished void on_all_tasks_finished(std::function callback) { - callback_all_task_finished = callback; + callback_all_task_finished = std::move(callback); } // Call when the state of one slot is changed @@ -378,7 +379,7 @@ struct llama_server_response { // Register the function to update multitask void on_multitask_update(callback_multitask_t callback) { - callback_update_multitask = callback; + callback_update_multitask = std::move(callback); } // Send a new result to a waiting task_id diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 4ff8e3fa7..da3aaa48e 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -32,11 +32,11 @@ int main(int argc, char ** argv) { tokens = ::llama_tokenize(model, prompt, add_bos, true); - for (int i = 0; i < (int) tokens.size(); i++) { + for (int token : tokens) { if (printing_ids) { - printf("%d\n", tokens[i]); + printf("%d\n", token); } else { - printf("%6d -> '%s'\n", tokens[i], llama_token_to_piece(ctx, tokens[i]).c_str()); + printf("%6d -> '%s'\n", token, llama_token_to_piece(ctx, token).c_str()); } } diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 49eaf3e61..0c9a3d41c 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -151,8 +151,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode ggml_allocr_alloc(alloc, model->tok_embeddings); ggml_allocr_alloc(alloc, model->norm); ggml_allocr_alloc(alloc, model->output); - for (uint32_t i = 0; i < model->layers.size(); ++i) { - auto & layer = model->layers[i]; + for (auto& layer : model->layers) { ggml_allocr_alloc(alloc, layer.attention_norm); ggml_allocr_alloc(alloc, layer.wq); ggml_allocr_alloc(alloc, layer.wk); @@ -166,8 +165,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode ggml_allocr_alloc(alloc, model->tok_embeddings->grad); ggml_allocr_alloc(alloc, model->norm->grad); ggml_allocr_alloc(alloc, model->output->grad); - for (uint32_t i = 0; i < model->layers.size(); ++i) { - auto & layer = model->layers[i]; + for (auto& layer : model->layers) { ggml_allocr_alloc(alloc, layer.attention_norm->grad); ggml_allocr_alloc(alloc, layer.wq->grad); ggml_allocr_alloc(alloc, layer.wk->grad); @@ -453,9 +451,9 @@ static struct ggml_tensor * llama_build_train_graphs( // allocating checkpoints in one block to reduce memory fragmentation // note: they will be freed in reverse order - for (int i = 0; i < (int) checkpoints.size(); ++i) { - if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) { - ggml_allocr_alloc(alloc, checkpoints[i]); + for (auto& checkpoint : checkpoints) { + if (checkpoint->data == NULL && checkpoint->view_src == NULL) { + ggml_allocr_alloc(alloc, checkpoint); } } @@ -925,7 +923,7 @@ struct save_train_files_data { }; static void save_train_files(void * vdata, struct train_state * train) { - struct save_train_files_data * data = (struct save_train_files_data *) vdata; + auto data = (struct save_train_files_data *) vdata; int64_t iter = train->opt->iter; if (strlen(data->fn_checkpoint_out) > 0) { @@ -945,8 +943,7 @@ static int64_t get_parameter_count(struct my_llama_model* model) { nx += ggml_nelements(model->norm); nx += ggml_nelements(model->output); - for (uint32_t i = 0; i < model->layers.size(); ++i) { - auto & layer = model->layers[i]; + for (auto& layer : model->layers) { nx += ggml_nelements(layer.attention_norm); nx += ggml_nelements(layer.wq); nx += ggml_nelements(layer.wk); diff --git a/llama.cpp b/llama.cpp index 096eb4ac0..4ba83d40b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1092,7 +1092,7 @@ struct llama_mlock { bool failed_already = false; - llama_mlock() {} + llama_mlock() = default; llama_mlock(const llama_mlock &) = delete; ~llama_mlock() { @@ -2958,7 +2958,7 @@ static void llm_load_hparams( } // TODO: This should probably be in llama.h -static std::vector llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special = false); +static std::vector llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special = false); static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch); static void llm_load_vocab( @@ -7111,8 +7111,8 @@ struct llm_tokenizer_bpe { const auto token = vocab.token_to_id.find(str); if (token == vocab.token_to_id.end()) { - for (auto j = str.begin(); j != str.end(); ++j) { - std::string byte_str(1, *j); + for (char j : str) { + std::string byte_str(1, j); auto token_multibyte = vocab.token_to_id.find(byte_str); if (token_multibyte == vocab.token_to_id.end()) { throw std::runtime_error("ERROR: byte not found in vocab"); @@ -7172,8 +7172,8 @@ private: bpe_encoded_words.reserve(text.size()); auto cps = codepoints_from_utf8(text); - for (size_t i = 0; i < cps.size(); ++i) - text_utf.emplace_back(codepoint_to_utf8(cps[i])); + for (unsigned int cp : cps) + text_utf.emplace_back(codepoint_to_utf8(cp)); for (int i = 0; i < (int)text_utf.size(); i++) { const std::string & utf_char = text_utf[i]; @@ -7344,7 +7344,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list< const auto & special_id = st.second; // for each text fragment - std::forward_list::iterator it = buffer.begin(); + auto it = buffer.begin(); while (it != buffer.end()) { auto & fragment = (*it); @@ -7431,7 +7431,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list< } } -static std::vector llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special) { +static std::vector llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special) { std::vector output; // OG tokenizer behavior: @@ -7887,7 +7887,7 @@ void llama_grammar_free(struct llama_grammar * grammar) { } struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar) { - llama_grammar * result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 }; + auto result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 }; // redirect elements in stacks to point to new rules for (size_t is = 0; is < result->stacks.size(); is++) { @@ -8095,8 +8095,8 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * } // Calculate absolute value of second derivatives - for (size_t i = 0; i < second_derivatives.size(); ++i) { - second_derivatives[i] = std::abs(second_derivatives[i]); + for (float& second_derivative : second_derivatives) { + second_derivative = std::abs(second_derivative); } // Normalize the second derivatives @@ -9412,8 +9412,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s if (tot_count > 0) { LLAMA_LOG_INFO(" | hist: "); - for (size_t i = 0; i < hist_cur.size(); i++) { - LLAMA_LOG_INFO("%5.3f ", hist_cur[i] / float(nelements)); + for (long long i : hist_cur) { + LLAMA_LOG_INFO("%5.3f ", i / float(nelements)); } } LLAMA_LOG_INFO("\n"); @@ -9448,14 +9448,14 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // print histogram for all tensors { int64_t sum_all = 0; - for (size_t i = 0; i < hist_all.size(); i++) { - sum_all += hist_all[i]; + for (auto i : hist_all) { + sum_all += i; } if (sum_all > 0) { LLAMA_LOG_INFO("%s: hist: ", __func__); - for (size_t i = 0; i < hist_all.size(); i++) { - LLAMA_LOG_INFO("%5.3f ", hist_all[i] / float(sum_all)); + for (auto i : hist_all) { + LLAMA_LOG_INFO("%5.3f ", i / float(sum_all)); } LLAMA_LOG_INFO("\n"); } @@ -9859,7 +9859,7 @@ struct llama_model * llama_load_model_from_file( struct llama_model_params params) { ggml_time_init(); - llama_model * model = new llama_model; + auto model = new llama_model; unsigned cur_percentage = 0; if (params.progress_callback == NULL) { @@ -9905,7 +9905,7 @@ struct llama_context * llama_new_context_with_model( return nullptr; } - llama_context * ctx = new llama_context(*model); + auto ctx = new llama_context(*model); const auto & hparams = model->hparams; auto & cparams = ctx->cparams; diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 5ec0ed335..a2f5c62aa 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -256,7 +256,7 @@ enum test_mode { }; struct test_case { - virtual ~test_case() {} + virtual ~test_case() = default; virtual std::string op_desc(ggml_tensor * t) { return ggml_op_desc(t); @@ -281,9 +281,9 @@ struct test_case { virtual size_t op_size(ggml_tensor * t) { size_t size = ggml_nbytes(t); // add source tensors - for (int i = 0; i < GGML_MAX_SRC; i++) { - if (t->src[i] != NULL) { - size += ggml_nbytes(t->src[i]); + for (auto& el : t->src) { + if (el) { + size += ggml_nbytes(el); } } return size; @@ -416,7 +416,7 @@ struct test_case { }; auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool { - callback_userdata * ud = (callback_userdata *) user_data; + auto ud = (callback_userdata *) user_data; const char * bn1 = ggml_backend_name(ud->backend1); const char * bn2 = ggml_backend_name(ud->backend2); diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp index 8ff76c891..0f555df09 100644 --- a/tests/test-grad0.cpp +++ b/tests/test-grad0.cpp @@ -583,7 +583,7 @@ int main(int argc, const char ** argv) { } // mean, not yet fully implemented - if(0) + if(false) { srand(seed); const int nargs = 1; @@ -601,7 +601,7 @@ int main(int argc, const char ** argv) { } // argmax - if (0) + if (false) { srand(seed); const int nargs = 1; @@ -732,7 +732,7 @@ int main(int argc, const char ** argv) { } // tanh, not yet fully implemented - if(0) + if(false) { srand(seed); const int nargs = 1; @@ -787,7 +787,7 @@ int main(int argc, const char ** argv) { } // elu, not yet fully implemented - if(0) + if(false) { srand(seed); const int nargs = 1; @@ -822,7 +822,7 @@ int main(int argc, const char ** argv) { } // gelu, not yet fully implemented - if(0) + if(false) { srand(seed); const int nargs = 1; @@ -1559,7 +1559,7 @@ int main(int argc, const char ** argv) { } // flash_attn f16, not yet fully implemented - if(0) + if(false) { srand(seed); const int nargs = 3; diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index a0b5b043d..ca1664eeb 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -29,10 +29,9 @@ term ::= [0-9]+)"""; }; uint32_t index = 0; - for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) - { - std::string key = it->first; - uint32_t value = it->second; + for (auto& symbol_id : parsed_grammar.symbol_ids) { + std::string key = symbol_id.first; + uint32_t value = symbol_id.second; std::pair expected_pair = expected[index]; // pretty print error message before asserting @@ -88,9 +87,7 @@ term ::= [0-9]+)"""; for (auto rule : parsed_grammar.rules) { // compare rule to expected rule - for (uint32_t i = 0; i < rule.size(); i++) - { - llama_grammar_element element = rule[i]; + for (auto element : rule) { llama_grammar_element expected_element = expected_rules[index]; // pretty print error message before asserting @@ -135,10 +132,9 @@ term ::= [0-9]+)"""; }; index = 0; - for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) - { - std::string key = it->first; - uint32_t value = it->second; + for (auto& symbol_id : parsed_grammar.symbol_ids) { + std::string key = symbol_id.first; + uint32_t value = symbol_id.second; std::pair expected_pair = expected[index]; // pretty print error message before asserting @@ -227,9 +223,7 @@ term ::= [0-9]+)"""; for (auto rule : parsed_grammar.rules) { // compare rule to expected rule - for (uint32_t i = 0; i < rule.size(); i++) - { - llama_grammar_element element = rule[i]; + for (auto element : rule) { llama_grammar_element expected_element = expected_rules[index]; // pretty print error message before asserting diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 78fc41117..e90aafe15 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -98,14 +98,14 @@ int main() }, }; - for (auto pair : expected) + for (const auto& pair : expected) { parsed_grammar.symbol_ids[pair.first] = pair.second; } - for (auto rule : expected_rules) + for (const auto& rule : expected_rules) { - parsed_grammar.rules.push_back({}); + parsed_grammar.rules.emplace_back(); for (auto element : rule) { parsed_grammar.rules.back().push_back(element); diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 31a78c632..4ccc66271 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -126,7 +126,7 @@ int main(int argc, char * argv[]) { bool failed = false; for (int i = 0; i < GGML_TYPE_COUNT; i++) { - ggml_type type = (ggml_type) i; + auto type = (ggml_type) i; ggml_type_traits_t qfns = ggml_internal_get_type_traits(type); // deprecated - skip diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 09d410b7f..24e288720 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -269,7 +269,7 @@ int main(int argc, char * argv[]) { struct ggml_context * ctx = ggml_init(ggml_params); for (int i = 0; i < GGML_TYPE_COUNT; i++) { - ggml_type type = (ggml_type) i; + auto type = (ggml_type) i; ggml_type_traits_t qfns = ggml_internal_get_type_traits(type); if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) { continue;