Add additional fixes

Change bind to lambdas
	Change push_back to emplace_back
	Replace for with range-based for
	Use auto to avoid duplication
	Use bool values instead of 0
	Use pass-by-value with std::move
This commit is contained in:
Michael Klimenko 2024-01-27 22:29:31 +01:00
parent e41d94972c
commit 55b008cdec
28 changed files with 237 additions and 259 deletions

View file

@ -511,7 +511,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
params.lora_adapter.emplace_back(argv[i], 1.0f);
params.use_mmap = false;
} else if (arg == "--lora-scaled") {
if (++i >= argc) {
@ -523,7 +523,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
params.use_mmap = false;
} else if (arg == "--lora-base") {
if (++i >= argc) {
@ -875,7 +875,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
}
if (!params.kv_overrides.empty()) {
params.kv_overrides.emplace_back(llama_model_kv_override());
params.kv_overrides.emplace_back();
params.kv_overrides.back().key[0] = 0;
}
@ -1335,8 +1335,8 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vector<llama_to
std::string piece;
std::string result;
for (size_t i = 0; i < tokens.size(); ++i) {
piece = llama_token_to_piece(ctx, tokens[i]);
for (int token : tokens) {
piece = llama_token_to_piece(ctx, token);
result += piece;
}

View file

@ -296,9 +296,9 @@ namespace grammar_parser {
static bool is_char_element(llama_grammar_element elem) {
switch (elem.type) {
case LLAMA_GRETYPE_CHAR: return true;
case LLAMA_GRETYPE_CHAR_NOT: return true;
case LLAMA_GRETYPE_CHAR_ALT: return true;
case LLAMA_GRETYPE_CHAR:
case LLAMA_GRETYPE_CHAR_NOT:
case LLAMA_GRETYPE_CHAR_ALT:
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
default: return false;
}

View file

@ -1,7 +1,7 @@
#include "sampling.h"
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
struct llama_sampling_context * result = new llama_sampling_context();
auto result = new llama_sampling_context();
result->params = params;
result->grammar = nullptr;
@ -197,8 +197,8 @@ static llama_token llama_sampling_sample_impl(
}
// apply params.logit_bias map
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
logits[it->first] += it->second;
for (auto logit_bia : params.logit_bias) {
logits[logit_bia.first] += logit_bia.second;
}
if (ctx_cfg) {

View file

@ -18,7 +18,7 @@ struct random_uniform_distribution {
};
struct train_state * init_train_state() {
struct train_state * state = new struct train_state;
auto state = new struct train_state;
state->train_its = 0;
state->train_samples = 0;
state->train_tokens = 0;
@ -1379,7 +1379,7 @@ void finish_processing_train_args(struct train_params_common * params) {
}
void train_opt_callback(void * vdata, int accum_step, float * sched, bool * cancel) {
struct train_opt_callback_data * data = (struct train_opt_callback_data *) vdata;
auto data = (struct train_opt_callback_data *) vdata;
struct train_params_common * params = data->params;
struct train_state * train = data->train;
struct ggml_opt_context * opt = train->opt;

View file

@ -164,13 +164,9 @@ int main(int argc, char ** argv) {
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------");
for ( int i_pp = 0; i_pp < (int) n_pp.size(); ++i_pp) {
for ( int i_tg = 0; i_tg < (int) n_tg.size(); ++i_tg) {
for (int i_pl = 0; i_pl < (int) n_pl.size(); ++i_pl) {
const int pp = n_pp[i_pp];
const int tg = n_tg[i_tg];
const int pl = n_pl[i_pl];
for (int pp : n_pp) {
for (int tg : n_tg) {
for (int pl : n_pl) {
const int n_ctx_req = is_pp_shared ? pp + pl*tg : pl*(pp + tg);
if (n_ctx_req > n_kv_max) {

View file

@ -143,7 +143,7 @@ int main(int argc, char ** argv) {
struct ggml_init_params params = {
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/* no_alloc =*/ 0
/* no_alloc =*/ false
};
ctx = ggml_init(params);

View file

@ -64,8 +64,8 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n");
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
for (int i : embd_inp) {
fprintf(stderr, "%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
fprintf(stderr, "\n");
}

View file

@ -225,7 +225,7 @@ static void free_lora(struct lora_data * lora) {
}
static struct lora_data * load_lora(struct lora_info * info) {
struct lora_data * result = new struct lora_data;
auto result = new struct lora_data;
result->info = *info;
result->ctx = NULL;
result->lora_r = 1;
@ -370,9 +370,9 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
static void export_lora(struct export_lora_params * params) {
// load all loras
std::vector<struct lora_data *> loras;
for (size_t i = 0; i < params->lora.size(); ++i) {
struct lora_data * lora = load_lora(&params->lora[i]);
if (lora != NULL) {
for (auto& i : params->lora) {
auto lora = load_lora(&i);
if (lora) {
loras.push_back(lora);
}
}
@ -431,8 +431,8 @@ static void export_lora(struct export_lora_params * params) {
fin.read_raw(data.data(), data.size());
// apply all loras
for (size_t k = 0; k < loras.size(); ++k) {
apply_lora(tensor, loras[k], params->n_threads);
for (auto& lora : loras) {
apply_lora(tensor, lora, params->n_threads);
}
// write tensor data + padding
@ -455,8 +455,8 @@ static void export_lora(struct export_lora_params * params) {
gguf_free(gguf_in);
// free loras
for (size_t i = 0; i < loras.size(); ++i) {
free_lora(loras[i]);
for (auto& lora : loras) {
free_lora(lora);
}
}

View file

@ -379,8 +379,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora)
ggml_allocr_alloc(alloc, lora->norm_b);
ggml_allocr_alloc(alloc, lora->output_a);
ggml_allocr_alloc(alloc, lora->output_b);
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
auto & layer = lora->layers[i];
for (auto& layer : lora->layers) {
ggml_allocr_alloc(alloc, layer.attention_norm_a);
ggml_allocr_alloc(alloc, layer.attention_norm_b);
ggml_allocr_alloc(alloc, layer.wq_a);
@ -406,8 +405,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora)
ggml_allocr_alloc(alloc, lora->norm_b->grad);
ggml_allocr_alloc(alloc, lora->output_a->grad);
ggml_allocr_alloc(alloc, lora->output_b->grad);
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
auto & layer = lora->layers[i];
for (auto& layer : lora->layers) {
ggml_allocr_alloc(alloc, layer.attention_norm_a->grad);
ggml_allocr_alloc(alloc, layer.attention_norm_b->grad);
ggml_allocr_alloc(alloc, layer.wq_a->grad);
@ -803,9 +801,9 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
// allocating checkpoints in one block to reduce memory fragmentation
// note: they will be freed in reverse order
for (unsigned int i = 0; i < checkpoints.size(); ++i) {
if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
ggml_allocr_alloc(alloc, checkpoints[i]);
for (auto& checkpoint : checkpoints) {
if (checkpoint->data == NULL && checkpoint->view_src == NULL) {
ggml_allocr_alloc(alloc, checkpoint);
}
}
@ -872,8 +870,7 @@ static void load_llama_lora_gguf(struct gguf_context * fctx, struct ggml_context
copy_tensor_by_name(lora->output_a, f_ggml_ctx, ggml_get_name(lora->output_a));
copy_tensor_by_name(lora->output_b, f_ggml_ctx, ggml_get_name(lora->output_b));
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
auto & layer = lora->layers[i];
for (auto& layer : lora->layers) {
copy_tensor_by_name(layer.attention_norm_a, f_ggml_ctx, ggml_get_name(layer.attention_norm_a));
copy_tensor_by_name(layer.attention_norm_b, f_ggml_ctx, ggml_get_name(layer.attention_norm_b));
copy_tensor_by_name(layer.wq_a, f_ggml_ctx, ggml_get_name(layer.wq_a));
@ -940,9 +937,7 @@ static void save_llama_lora_gguf(struct gguf_context * fctx, struct my_llama_mod
gguf_add_tensor(fctx, lora->output_a);
gguf_add_tensor(fctx, lora->output_b);
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
auto & layer = lora->layers[i];
for (auto& layer : lora->layers) {
gguf_add_tensor(fctx, layer.attention_norm_a);
gguf_add_tensor(fctx, layer.attention_norm_b);
gguf_add_tensor(fctx, layer.wq_a);
@ -1476,7 +1471,7 @@ struct save_train_files_data {
};
static void save_train_files(void * vdata, struct train_state * train) {
struct save_train_files_data * data = (struct save_train_files_data *) vdata;
auto data = (struct save_train_files_data *) vdata;
int64_t iter = train->opt->iter;
@ -1499,8 +1494,7 @@ static int64_t get_parameter_count(struct my_llama_lora* lora) {
nx += ggml_nelements(lora->output_a);
nx += ggml_nelements(lora->output_b);
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
auto & layer = lora->layers[i];
for (auto& layer : lora->layers) {
nx += ggml_nelements(layer.attention_norm_a);
nx += ggml_nelements(layer.attention_norm_b);
nx += ggml_nelements(layer.wq_a);
@ -1817,12 +1811,12 @@ int main(int argc, char ** argv) {
std::vector<size_t> token_noccurs;
token_noccurs.resize(model.hparams.n_vocab, 0);
for (unsigned int i = 0; i < train_tokens.size(); ++i) {
++token_noccurs[train_tokens[i]];
for (int train_token : train_tokens) {
++token_noccurs[train_token];
}
int n_unique_tokens = 0;
for (unsigned int i = 0; i < token_noccurs.size(); ++i) {
if (token_noccurs[i] == 0) continue;
for (unsigned long long token_noccur : token_noccurs) {
if (token_noccur == 0) continue;
++n_unique_tokens;
}
printf("%s: number of unique tokens: %d\n", __func__, n_unique_tokens);

View file

@ -216,8 +216,8 @@ static std::vector<float> softmax(const std::vector<float>& logits) {
sum_exp += exp_logit;
probs[i] = exp_logit;
}
for (size_t i = 0; i < probs.size(); i++) {
probs[i] /= sum_exp;
for (float& prob : probs) {
prob /= static_cast<float>(sum_exp);
}
return probs;
}

View file

@ -313,16 +313,16 @@ int main(int argc, char ** argv) {
LOG_TEE("\n");
LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
for (int i : embd_inp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
if (ctx_guidance) {
LOG_TEE("\n");
LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str());
LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
for (int i = 0; i < (int) guidance_inp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
for (int i : guidance_inp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
}

View file

@ -745,7 +745,7 @@ const std::string test::cpu_info = get_cpu_info();
const std::string test::gpu_info = get_gpu_info();
struct printer {
virtual ~printer() {}
virtual ~printer() = default;
FILE * fout;
virtual void print_header(const cmd_params & params) { (void) params; }
@ -891,43 +891,43 @@ struct markdown_printer : public printer {
void print_header(const cmd_params & params) override {
// select fields to print
fields.push_back("model");
fields.push_back("size");
fields.push_back("params");
fields.push_back("backend");
fields.emplace_back("model");
fields.emplace_back("size");
fields.emplace_back("params");
fields.emplace_back("backend");
bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS";
if (!is_cpu_backend) {
fields.push_back("n_gpu_layers");
fields.emplace_back("n_gpu_layers");
}
if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {
fields.push_back("n_threads");
fields.emplace_back("n_threads");
}
if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
fields.push_back("n_batch");
fields.emplace_back("n_batch");
}
if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) {
fields.push_back("type_k");
fields.emplace_back("type_k");
}
if (params.type_v.size() > 1 || params.type_v != cmd_params_defaults.type_v) {
fields.push_back("type_v");
fields.emplace_back("type_v");
}
if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) {
fields.push_back("main_gpu");
fields.emplace_back("main_gpu");
}
if (params.split_mode.size() > 1 || params.split_mode != cmd_params_defaults.split_mode) {
fields.push_back("split_mode");
fields.emplace_back("split_mode");
}
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
fields.push_back("mul_mat_q");
fields.emplace_back("mul_mat_q");
}
if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) {
fields.push_back("no_kv_offload");
fields.emplace_back("no_kv_offload");
}
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
fields.push_back("tensor_split");
fields.emplace_back("tensor_split");
}
fields.push_back("test");
fields.push_back("t/s");
fields.emplace_back("test");
fields.emplace_back("t/s");
fprintf(fout, "|");
for (const auto & field : fields) {

View file

@ -800,7 +800,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
buffer_size += n_tensors * 128 /* CLIP PADDING */;
clip_ctx * new_clip = new clip_ctx;
auto* new_clip = new clip_ctx;
// update projector type
{
@ -1416,13 +1416,13 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
int64_t sum_all = 0;
for (size_t i = 0; i < hist_all.size(); ++i) {
sum_all += hist_all[i];
for (auto i : hist_all) {
sum_all += i;
}
printf("%s: hist: ", __func__);
for (size_t i = 0; i < hist_all.size(); ++i) {
printf("%5.3f ", hist_all[i] / (float)sum_all);
for (auto i : hist_all) {
printf("%5.3f ", i / (float)sum_all);
}
printf("\n");
}

View file

@ -132,7 +132,7 @@ int main(int argc, char ** argv) {
for (int i = 0; i < W; i++) {
// there are different ways to init these tokens
if (0) {
if (false) {
// initialize randomly from the prompt tokens
tokens_j[j][i] = all[1 + rand() % (all.size() - 1)];
} else {
@ -268,10 +268,10 @@ int main(int argc, char ** argv) {
// if no active ngrams are left, it means the sampled token does not pass the verification
if (v > 0) {
for (int g = 0; g < (int) ngrams_cur.size(); g++) {
if (ngrams_cur[g].active) {
i_batch = ngrams_cur[g].i_batch[v];
seq_id_best = ngrams_cur[g].seq_id;
for (auto& g : ngrams_cur) {
if (g.active) {
i_batch = g.i_batch[v];
seq_id_best = g.seq_id;
++n_accept;
break;
@ -316,20 +316,20 @@ int main(int argc, char ** argv) {
}
// verify across active n-grams
for (int g = 0; g < (int) ngrams_cur.size(); g++) {
if (ngrams_cur[g].active) {
for (auto& g : ngrams_cur) {
if (g.active) {
if (v == N - 1) {
ngrams_cur[g].active = false;
g.active = false;
} else {
if (id != ngrams_cur[g].tokens[v + 1]) {
ngrams_cur[g].active = false;
if (id != g.tokens[v + 1]) {
g.active = false;
}
}
}
}
// print known n-grams starting with token id (debug)
if (0 && v == 0) {
if (false && v == 0) {
if (ngrams_observed.cnt[id] > 0) {
printf("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], llama_token_to_piece(ctx, id).c_str());
}
@ -367,7 +367,7 @@ int main(int argc, char ** argv) {
} else {
for (int i = 0; i < W; i++) {
// there are different ways to init these tokens
if (0) {
if (false) {
// random init
tokens_j[N - 2][i] = all[1 + rand() % (all.size() - 1)];
} else {

View file

@ -344,12 +344,12 @@ int main(int argc, char ** argv) {
// in instruct mode, we inject a prefix and a suffix to each input by the user
if (params.instruct) {
params.interactive_first = true;
params.antiprompt.push_back("### Instruction:\n\n");
params.antiprompt.emplace_back("### Instruction:\n\n");
}
// similar for chatml mode
else if (params.chatml) {
params.interactive_first = true;
params.antiprompt.push_back("<|im_start|>user\n");
params.antiprompt.emplace_back("<|im_start|>user\n");
}
// enable interactive mode if interactive start is specified
@ -361,16 +361,16 @@ int main(int argc, char ** argv) {
LOG_TEE("\n");
LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
for (int i : embd_inp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
if (ctx_guidance) {
LOG_TEE("\n");
LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str());
LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
for (int i = 0; i < (int) guidance_inp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
for (int i : guidance_inp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
}
@ -405,8 +405,8 @@ int main(int argc, char ** argv) {
LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str());
if (params.verbose_prompt) {
auto tmp = ::llama_tokenize(ctx, antiprompt, false, true);
for (int i = 0; i < (int) tmp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
for (int i : tmp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
}
}
@ -420,8 +420,8 @@ int main(int argc, char ** argv) {
LOG_TEE("Input prefix: '%s'\n", params.input_prefix.c_str());
if (params.verbose_prompt) {
auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true);
for (int i = 0; i < (int) tmp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
for (int i : tmp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
}
}
@ -430,8 +430,8 @@ int main(int argc, char ** argv) {
LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str());
if (params.verbose_prompt) {
auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true);
for (int i = 0; i < (int) tmp.size(); i++) {
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
for (int i : tmp) {
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
}
}
}

View file

@ -94,8 +94,8 @@ static std::vector<float> softmax(const std::vector<float>& logits) {
sum_exp += exp_logit;
probs[i] = exp_logit;
}
for (size_t i = 0; i < probs.size(); i++) {
probs[i] /= sum_exp;
for (float& prob : probs) {
prob /= static_cast<float>(sum_exp);
}
return probs;
}
@ -881,7 +881,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
size_t li = hs_cur.common_prefix;
for (int s = 0; s < 4; ++s) {
for (size_t j = hs_cur.common_prefix; j < hs_cur.seq_tokens[s].size() - 1; j++) {
eval_pairs.push_back(std::make_pair(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]));
eval_pairs.emplace_back(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]);
}
++li;
}
@ -997,7 +997,7 @@ static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string&
printf("%s: no _ in <%s>\n", __func__, sentence.c_str());
continue;
}
std::istringstream stream(answer.c_str());
std::istringstream stream(answer);
int i_answer; stream >> i_answer;
if (stream.fail() || i_answer < 1 || i_answer > 2) {
printf("%s: failed to parse answer <%s>\n", __func__, answer.c_str());
@ -1158,13 +1158,13 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
const int last_1st = task.seq_tokens[0].size() - n_base1 > 1 ? 1 : 0;
size_t li = n_base1 - 1;
for (size_t j = n_base1-1; j < task.seq_tokens[0].size()-1-last_1st; ++j) {
eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[0][j+1]));
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[0][j+1]);
}
const auto& n_base2 = skip_choice ? task.n_base2 : task.common_prefix;
const int last_2nd = task.seq_tokens[1].size() - n_base2 > 1 ? 1 : 0;
li = task.seq_tokens[0].size() - task.common_prefix + n_base2 - 1;
for (size_t j = n_base2-1; j < task.seq_tokens[1].size()-1-last_2nd; ++j) {
eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[1][j+1]));
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[1][j+1]);
}
}
compute_logprobs(batch_logits.data(), n_vocab, workers, eval_pairs, eval_results);
@ -1221,7 +1221,7 @@ static bool deserialize_string(std::istream & in, std::string & str) {
uint32_t size;
if (!in.read((char *)&size, sizeof(size)).fail()) {
str.resize(size);
if (!in.read((char *)&str[0], size).fail()) return true;
if (!in.read((char *)str.data(), size).fail()) return true;
}
return false;
}
@ -1523,7 +1523,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
size_t li = cur_task.common_prefix;
for (int s = 0; s < int(cur_task.seq_tokens.size()); ++s) {
for (size_t j = cur_task.common_prefix; j < cur_task.seq_tokens[s].size() - 1; j++) {
eval_pairs.push_back(std::make_pair(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]));
eval_pairs.emplace_back(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]);
}
++li;
}

View file

@ -223,6 +223,7 @@ using socket_t = int;
#include <string>
#include <sys/stat.h>
#include <thread>
#include <utility>
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
#ifdef _WIN32
@ -705,7 +706,7 @@ public:
Server &set_file_request_handler(Handler handler);
Server &set_error_handler(HandlerWithResponse handler);
Server &set_error_handler(Handler handler);
Server &set_error_handler(const Handler& handler);
Server &set_exception_handler(ExceptionHandler handler);
Server &set_pre_routing_handler(HandlerWithResponse handler);
Server &set_post_routing_handler(Handler handler);
@ -781,7 +782,7 @@ private:
bool dispatch_request(Request &req, Response &res, const Handlers &handlers);
bool
dispatch_request_for_content_reader(Request &req, Response &res,
ContentReader content_reader,
const ContentReader& content_reader,
const HandlersForContentReader &handlers);
bool parse_request_line(const char *s, Request &req);
@ -804,7 +805,7 @@ private:
MultipartContentHeader multipart_header,
ContentReceiver multipart_receiver);
bool read_content_core(Stream &strm, Request &req, Response &res,
ContentReceiver receiver,
const ContentReceiver& receiver,
MultipartContentHeader multipart_header,
ContentReceiver multipart_receiver);
@ -910,8 +911,8 @@ public:
explicit ClientImpl(const std::string &host, int port);
explicit ClientImpl(const std::string &host, int port,
const std::string &client_cert_path,
const std::string &client_key_path);
std::string client_cert_path,
std::string client_key_path);
virtual ~ClientImpl();
@ -937,7 +938,7 @@ public:
Result Get(const std::string &path, ResponseHandler response_handler,
ContentReceiver content_receiver, Progress progress);
Result Get(const std::string &path, const Headers &headers,
ResponseHandler response_handler, ContentReceiver content_receiver,
ResponseHandler response_handler, const ContentReceiver& content_receiver,
Progress progress);
Result Get(const std::string &path, const Params &params,
@ -946,8 +947,8 @@ public:
const Headers &headers, ContentReceiver content_receiver,
Progress progress = nullptr);
Result Get(const std::string &path, const Params &params,
const Headers &headers, ResponseHandler response_handler,
ContentReceiver content_receiver, Progress progress = nullptr);
const Headers &headers, const ResponseHandler& response_handler,
const ContentReceiver& content_receiver, const Progress& progress = nullptr);
Result Head(const std::string &path);
Result Head(const std::string &path, const Headers &headers);
@ -1790,7 +1791,7 @@ void hosted_at(const std::string &hostname, std::vector<std::string> &addrs);
std::string append_query_params(const std::string &path, const Params &params);
std::pair<std::string, std::string> make_range_header(Ranges ranges);
std::pair<std::string, std::string> make_range_header(const Ranges& ranges);
std::pair<std::string, std::string>
make_basic_authentication_header(const std::string &username,
@ -1808,12 +1809,12 @@ void read_file(const std::string &path, std::string &out);
std::string trim_copy(const std::string &s);
void split(const char *b, const char *e, char d,
std::function<void(const char *, const char *)> fn);
const std::function<void(const char *, const char *)>& fn);
bool process_client_socket(socket_t sock, time_t read_timeout_sec,
time_t read_timeout_usec, time_t write_timeout_sec,
time_t write_timeout_usec,
std::function<bool(Stream &)> callback);
const std::function<bool(Stream &)>& callback);
socket_t create_client_socket(
const std::string &host, const std::string &ip, int port,
@ -2231,8 +2232,8 @@ inline void read_file(const std::string &path, std::string &out) {
fs.seekg(0, std::ios_base::end);
auto size = fs.tellg();
fs.seekg(0);
out.resize(static_cast<size_t>(size));
fs.read(&out[0], static_cast<std::streamsize>(size));
out.resize(size);
fs.read(&out[0], size);
}
inline std::string file_extension(const std::string &path) {
@ -2261,7 +2262,7 @@ inline std::string trim_copy(const std::string &s) {
}
inline void split(const char *b, const char *e, char d,
std::function<void(const char *, const char *)> fn) {
const std::function<void(const char *, const char *)>& fn) {
size_t i = 0;
size_t beg = 0;
@ -2624,7 +2625,7 @@ inline bool process_client_socket(socket_t sock, time_t read_timeout_sec,
time_t read_timeout_usec,
time_t write_timeout_sec,
time_t write_timeout_usec,
std::function<bool(Stream &)> callback) {
const std::function<bool(Stream &)>& callback) {
SocketStream strm(sock, read_timeout_sec, read_timeout_usec,
write_timeout_sec, write_timeout_usec);
return callback(strm);
@ -3037,7 +3038,7 @@ find_content_type(const std::string &path,
case "svg"_t: return "image/svg+xml";
case "webp"_t: return "image/webp";
case "ico"_t: return "image/x-icon";
case "tif"_t: return "image/tiff";
case "tif"_t:
case "tiff"_t: return "image/tiff";
case "jpg"_t:
case "jpeg"_t: return "image/jpeg";
@ -3488,13 +3489,13 @@ inline bool read_headers(Stream &strm, Headers &headers) {
}
inline bool read_content_with_length(Stream &strm, uint64_t len,
Progress progress,
ContentReceiverWithProgress out) {
const Progress& progress,
const ContentReceiverWithProgress& out) {
char buf[CPPHTTPLIB_RECV_BUFSIZ];
uint64_t r = 0;
while (r < len) {
auto read_len = static_cast<size_t>(len - r);
auto read_len = len - r;
auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
if (n <= 0) { return false; }
@ -3513,7 +3514,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) {
char buf[CPPHTTPLIB_RECV_BUFSIZ];
uint64_t r = 0;
while (r < len) {
auto read_len = static_cast<size_t>(len - r);
auto read_len = len - r;
auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
if (n <= 0) { return; }
r += static_cast<uint64_t>(n);
@ -3521,7 +3522,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) {
}
inline bool read_content_without_length(Stream &strm,
ContentReceiverWithProgress out) {
const ContentReceiverWithProgress& out) {
char buf[CPPHTTPLIB_RECV_BUFSIZ];
uint64_t r = 0;
for (;;) {
@ -3983,12 +3984,12 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) try {
if (std::regex_match(b, e, cm, re_another_range)) {
ssize_t first = -1;
if (!cm.str(1).empty()) {
first = static_cast<ssize_t>(std::stoll(cm.str(1)));
first = std::stoll(cm.str(1));
}
ssize_t last = -1;
if (!cm.str(2).empty()) {
last = static_cast<ssize_t>(std::stoll(cm.str(2)));
last = std::stoll(cm.str(2));
}
if (first != -1 && last != -1 && first > last) {
@ -4254,9 +4255,8 @@ inline std::string make_multipart_data_boundary() {
inline bool is_multipart_boundary_chars_valid(const std::string &boundary) {
auto valid = true;
for (size_t i = 0; i < boundary.size(); i++) {
auto c = boundary[i];
if (!std::isalnum(c) && c != '-' && c != '_') {
for (char c : boundary) {
if (!std::isalnum(c) && c != '-' && c != '_') {
valid = false;
break;
}
@ -4707,7 +4707,7 @@ inline bool parse_www_authenticate(const Response &res,
s = s.substr(pos + 1);
auto beg = std::sregex_iterator(s.begin(), s.end(), re);
for (auto i = beg; i != std::sregex_iterator(); ++i) {
auto m = *i;
const auto& m = *i;
auto key = s.substr(static_cast<size_t>(m.position(1)),
static_cast<size_t>(m.length(1)));
auto val = m.length(2) > 0
@ -4802,7 +4802,7 @@ inline std::string append_query_params(const std::string &path,
}
// Header utilities
inline std::pair<std::string, std::string> make_range_header(Ranges ranges) {
inline std::pair<std::string, std::string> make_range_header(const Ranges& ranges) {
std::string field = "bytes=";
auto i = 0;
for (auto r : ranges) {
@ -4949,7 +4949,7 @@ inline void Response::set_content_provider(
set_header("Content-Type", content_type);
content_length_ = in_length;
if (in_length > 0) { content_provider_ = std::move(provider); }
content_provider_resource_releaser_ = resource_releaser;
content_provider_resource_releaser_ = std::move(resource_releaser);
is_chunked_content_provider_ = false;
}
@ -4959,7 +4959,7 @@ inline void Response::set_content_provider(
set_header("Content-Type", content_type);
content_length_ = 0;
content_provider_ = detail::ContentProviderAdapter(std::move(provider));
content_provider_resource_releaser_ = resource_releaser;
content_provider_resource_releaser_ = std::move(resource_releaser);
is_chunked_content_provider_ = false;
}
@ -4969,7 +4969,7 @@ inline void Response::set_chunked_content_provider(
set_header("Content-Type", content_type);
content_length_ = 0;
content_provider_ = detail::ContentProviderAdapter(std::move(provider));
content_provider_resource_releaser_ = resource_releaser;
content_provider_resource_releaser_ = std::move(resource_releaser);
is_chunked_content_provider_ = true;
}
@ -5010,7 +5010,7 @@ inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec,
write_timeout_sec_(write_timeout_sec),
write_timeout_usec_(write_timeout_usec), read_buff_(read_buff_size_, 0) {}
inline SocketStream::~SocketStream() {}
inline SocketStream::~SocketStream() = default;
inline bool SocketStream::is_readable() const {
return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0;
@ -5101,7 +5101,7 @@ inline ssize_t BufferStream::read(char *ptr, size_t size) {
#else
auto len_read = buffer.copy(ptr, size, position);
#endif
position += static_cast<size_t>(len_read);
position += len_read;
return static_cast<ssize_t>(len_read);
}
@ -5131,7 +5131,7 @@ inline Server::Server()
#endif
}
inline Server::~Server() {}
inline Server::~Server() = default;
inline Server &Server::Get(const std::string &pattern, Handler handler) {
get_handlers_.push_back(
@ -5241,7 +5241,7 @@ inline Server &Server::set_error_handler(HandlerWithResponse handler) {
return *this;
}
inline Server &Server::set_error_handler(Handler handler) {
inline Server &Server::set_error_handler(const Handler& handler) {
error_handler_ = [handler](const Request &req, Response &res) {
handler(req, res);
return HandlerResponse::Handled;
@ -5618,7 +5618,7 @@ inline bool Server::read_content_with_content_receiver(
}
inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
ContentReceiver receiver,
const ContentReceiver& receiver,
MultipartContentHeader multipart_header,
ContentReceiver multipart_receiver) {
detail::MultipartFormDataParser multipart_form_data_parser;
@ -5688,7 +5688,7 @@ inline bool Server::handle_file_request(const Request &req, Response &res,
detail::find_content_type(path, file_extension_and_mimetype_map_);
if (type) { res.set_header("Content-Type", type); }
for (const auto &kv : entry.headers) {
res.set_header(kv.first.c_str(), kv.second);
res.set_header(kv.first, kv.second);
}
res.status = req.has_header("Range") ? 206 : 200;
if (!head && file_request_handler_) {
@ -6024,7 +6024,7 @@ inline void Server::apply_ranges(const Request &req, Response &res,
}
inline bool Server::dispatch_request_for_content_reader(
Request &req, Response &res, ContentReader content_reader,
Request &req, Response &res, const ContentReader& content_reader,
const HandlersForContentReader &handlers) {
for (const auto &x : handlers) {
const auto &pattern = x.first;
@ -6202,11 +6202,11 @@ inline ClientImpl::ClientImpl(const std::string &host, int port)
: ClientImpl(host, port, std::string(), std::string()) {}
inline ClientImpl::ClientImpl(const std::string &host, int port,
const std::string &client_cert_path,
const std::string &client_key_path)
std::string client_cert_path,
std::string client_key_path)
: host_(host), port_(port),
host_and_port_(adjust_host_string(host) + ":" + std::to_string(port)),
client_cert_path_(client_cert_path), client_key_path_(client_key_path) {}
client_cert_path_(std::move(client_cert_path)), client_key_path_(std::move(client_key_path)) {}
inline ClientImpl::~ClientImpl() {
std::lock_guard<std::mutex> guard(socket_mutex_);
@ -6579,7 +6579,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
return false;
#endif
} else {
ClientImpl cli(next_host.c_str(), next_port);
ClientImpl cli(next_host, next_port);
cli.copy_settings(*this);
return detail::redirect(cli, req, res, path, location, error);
}
@ -7056,7 +7056,7 @@ inline Result ClientImpl::Get(const std::string &path,
inline Result ClientImpl::Get(const std::string &path, const Headers &headers,
ResponseHandler response_handler,
ContentReceiver content_receiver,
const ContentReceiver& content_receiver,
Progress progress) {
Request req;
req.method = "GET";
@ -7078,27 +7078,27 @@ inline Result ClientImpl::Get(const std::string &path, const Params &params,
if (params.empty()) { return Get(path, headers); }
std::string path_with_query = append_query_params(path, params);
return Get(path_with_query.c_str(), headers, progress);
return Get(path_with_query, headers, std::move(progress));
}
inline Result ClientImpl::Get(const std::string &path, const Params &params,
const Headers &headers,
ContentReceiver content_receiver,
Progress progress) {
return Get(path, params, headers, nullptr, content_receiver, progress);
return Get(path, params, headers, nullptr, std::move(content_receiver), std::move(progress));
}
inline Result ClientImpl::Get(const std::string &path, const Params &params,
const Headers &headers,
ResponseHandler response_handler,
ContentReceiver content_receiver,
Progress progress) {
const ResponseHandler& response_handler,
const ContentReceiver& content_receiver,
const Progress& progress) {
if (params.empty()) {
return Get(path, headers, response_handler, content_receiver, progress);
}
std::string path_with_query = append_query_params(path, params);
return Get(path_with_query.c_str(), headers, response_handler,
return Get(path_with_query, headers, response_handler,
content_receiver, progress);
}
@ -7201,7 +7201,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
const auto &content_type =
detail::serialize_multipart_formdata_get_content_type(boundary);
const auto &body = detail::serialize_multipart_formdata(items, boundary);
return Post(path, headers, body, content_type.c_str());
return Post(path, headers, body, content_type);
}
inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
@ -7214,7 +7214,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
const auto &content_type =
detail::serialize_multipart_formdata_get_content_type(boundary);
const auto &body = detail::serialize_multipart_formdata(items, boundary);
return Post(path, headers, body, content_type.c_str());
return Post(path, headers, body, content_type);
}
inline Result
@ -8361,7 +8361,7 @@ inline Client::Client(const std::string &host, int port,
: cli_(detail::make_unique<ClientImpl>(host, port, client_cert_path,
client_key_path)) {}
inline Client::~Client() {}
inline Client::~Client() = default;
inline bool Client::is_valid() const {
return cli_ != nullptr && cli_->is_valid();
@ -8421,19 +8421,19 @@ inline Result Client::Get(const std::string &path, const Headers &headers,
}
inline Result Client::Get(const std::string &path, const Params &params,
const Headers &headers, Progress progress) {
return cli_->Get(path, params, headers, progress);
return cli_->Get(path, params, headers, std::move(progress));
}
inline Result Client::Get(const std::string &path, const Params &params,
const Headers &headers,
ContentReceiver content_receiver, Progress progress) {
return cli_->Get(path, params, headers, content_receiver, progress);
return cli_->Get(path, params, headers, std::move(content_receiver), std::move(progress));
}
inline Result Client::Get(const std::string &path, const Params &params,
const Headers &headers,
ResponseHandler response_handler,
ContentReceiver content_receiver, Progress progress) {
return cli_->Get(path, params, headers, response_handler, content_receiver,
progress);
return cli_->Get(path, params, headers, std::move(response_handler), std::move(content_receiver),
std::move(progress));
}
inline Result Client::Head(const std::string &path) { return cli_->Head(path); }
@ -8754,7 +8754,7 @@ inline void Client::enable_server_certificate_verification(bool enabled) {
}
#endif
inline void Client::set_logger(Logger logger) { cli_->set_logger(logger); }
inline void Client::set_logger(Logger logger) { cli_->set_logger(std::move(logger)); }
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
inline void Client::set_ca_cert_path(const std::string &ca_cert_file_path,

View file

@ -1202,9 +1202,8 @@ struct llama_server_context
(json)(slot.images[image_idx].prefix_prompt);
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i)
{
llama_batch_add(batch, append_tokens[i], slot.n_past, { slot.id }, true);
for (int append_token : append_tokens) {
llama_batch_add(batch, append_token, slot.n_past, { slot.id }, true);
slot.n_past += 1;
}
}
@ -2034,7 +2033,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true;
break;
}
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
params.lora_adapter.emplace_back(argv[i], 1.0f);
params.use_mmap = false;
}
else if (arg == "--lora-scaled")
@ -2050,7 +2049,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true;
break;
}
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
params.use_mmap = false;
}
else if (arg == "--lora-base")
@ -2192,7 +2191,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
}
if (!params.kv_overrides.empty()) {
params.kv_overrides.emplace_back(llama_model_kv_override());
params.kv_overrides.emplace_back();
params.kv_overrides.back().key[0] = 0;
}
@ -2626,12 +2625,11 @@ int main(int argc, char **argv)
if (!llama_result.error) {
std::vector<json> result_array = format_partial_response_oaicompat( llama_result);
for (auto it = result_array.begin(); it != result_array.end(); ++it)
{
if (!it->empty()) {
for (auto& it : result_array) {
if (!it.empty()) {
const std::string str =
"data: " +
it->dump(-1, ' ', false, json::error_handler_t::replace) +
it.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n";
LOG_VERBOSE("data stream", {{"to_send", str}});
if (!sink.write(str.c_str(), str.size())) {
@ -2824,19 +2822,17 @@ int main(int argc, char **argv)
}*/
//);
llama.queue_tasks.on_new_task(std::bind(
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
llama.queue_tasks.on_finish_multitask(std::bind(
&llama_server_context::on_finish_multitask, &llama, std::placeholders::_1));
llama.queue_tasks.on_all_tasks_finished(std::bind(
&llama_server_context::run_on_all_tasks_finished, &llama));
llama.queue_results.on_multitask_update(std::bind(
&llama_server_queue::update_multitask,
&llama.queue_tasks,
std::placeholders::_1,
std::placeholders::_2,
std::placeholders::_3
));
llama.queue_tasks.on_new_task([ObjectPtr = &llama](auto&& PH1) {
ObjectPtr->process_single_task(std::forward<decltype(PH1)>(PH1));
});
llama.queue_tasks.on_finish_multitask([ObjectPtr = &llama](auto&& PH1) {
ObjectPtr->on_finish_multitask(std::forward<decltype(PH1)>(PH1));
});
llama.queue_tasks.on_all_tasks_finished([ObjectPtr = &llama] { ObjectPtr->run_on_all_tasks_finished(); });
llama.queue_results.on_multitask_update([ObjectPtr = &llama.queue_tasks](auto&& PH1, auto&& PH2, auto&& PH3) {
ObjectPtr->update_multitask(std::forward<decltype(PH1)>(PH1), std::forward<decltype(PH2)>(PH2),
std::forward<decltype(PH3)>(PH3));
});
llama.queue_tasks.start_loop();
t.join();

View file

@ -1,6 +1,7 @@
#pragma once
#include <string>
#include <utility>
#include <vector>
#include <set>
#include <mutex>
@ -171,10 +172,10 @@ inline std::string format_chatml(std::vector<json> messages)
{
std::ostringstream chatml_msgs;
for (auto it = messages.begin(); it != messages.end(); ++it) {
for (auto& message : messages) {
chatml_msgs << "<|im_start|>"
<< json_value(*it, "role", std::string("user")) << '\n';
chatml_msgs << json_value(*it, "content", std::string(""))
<< json_value(message, "role", std::string("user")) << '\n';
chatml_msgs << json_value(message, "content", std::string(""))
<< "<|im_end|>\n";
}
@ -225,17 +226,17 @@ struct llama_server_queue {
// Register function to process a new task
void on_new_task(std::function<void(task_server&)> callback) {
callback_new_task = callback;
callback_new_task = std::move(callback);
}
// Register function to process a multitask
void on_finish_multitask(std::function<void(task_multi&)> callback) {
callback_finish_multitask = callback;
callback_finish_multitask = std::move(callback);
}
// Register the function to be called when the batch of tasks is finished
void on_all_tasks_finished(std::function<void(void)> callback) {
callback_all_task_finished = callback;
callback_all_task_finished = std::move(callback);
}
// Call when the state of one slot is changed
@ -378,7 +379,7 @@ struct llama_server_response {
// Register the function to update multitask
void on_multitask_update(callback_multitask_t callback) {
callback_update_multitask = callback;
callback_update_multitask = std::move(callback);
}
// Send a new result to a waiting task_id

View file

@ -32,11 +32,11 @@ int main(int argc, char ** argv) {
tokens = ::llama_tokenize(model, prompt, add_bos, true);
for (int i = 0; i < (int) tokens.size(); i++) {
for (int token : tokens) {
if (printing_ids) {
printf("%d\n", tokens[i]);
printf("%d\n", token);
} else {
printf("%6d -> '%s'\n", tokens[i], llama_token_to_piece(ctx, tokens[i]).c_str());
printf("%6d -> '%s'\n", token, llama_token_to_piece(ctx, token).c_str());
}
}

View file

@ -151,8 +151,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
ggml_allocr_alloc(alloc, model->tok_embeddings);
ggml_allocr_alloc(alloc, model->norm);
ggml_allocr_alloc(alloc, model->output);
for (uint32_t i = 0; i < model->layers.size(); ++i) {
auto & layer = model->layers[i];
for (auto& layer : model->layers) {
ggml_allocr_alloc(alloc, layer.attention_norm);
ggml_allocr_alloc(alloc, layer.wq);
ggml_allocr_alloc(alloc, layer.wk);
@ -166,8 +165,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
ggml_allocr_alloc(alloc, model->tok_embeddings->grad);
ggml_allocr_alloc(alloc, model->norm->grad);
ggml_allocr_alloc(alloc, model->output->grad);
for (uint32_t i = 0; i < model->layers.size(); ++i) {
auto & layer = model->layers[i];
for (auto& layer : model->layers) {
ggml_allocr_alloc(alloc, layer.attention_norm->grad);
ggml_allocr_alloc(alloc, layer.wq->grad);
ggml_allocr_alloc(alloc, layer.wk->grad);
@ -453,9 +451,9 @@ static struct ggml_tensor * llama_build_train_graphs(
// allocating checkpoints in one block to reduce memory fragmentation
// note: they will be freed in reverse order
for (int i = 0; i < (int) checkpoints.size(); ++i) {
if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
ggml_allocr_alloc(alloc, checkpoints[i]);
for (auto& checkpoint : checkpoints) {
if (checkpoint->data == NULL && checkpoint->view_src == NULL) {
ggml_allocr_alloc(alloc, checkpoint);
}
}
@ -925,7 +923,7 @@ struct save_train_files_data {
};
static void save_train_files(void * vdata, struct train_state * train) {
struct save_train_files_data * data = (struct save_train_files_data *) vdata;
auto data = (struct save_train_files_data *) vdata;
int64_t iter = train->opt->iter;
if (strlen(data->fn_checkpoint_out) > 0) {
@ -945,8 +943,7 @@ static int64_t get_parameter_count(struct my_llama_model* model) {
nx += ggml_nelements(model->norm);
nx += ggml_nelements(model->output);
for (uint32_t i = 0; i < model->layers.size(); ++i) {
auto & layer = model->layers[i];
for (auto& layer : model->layers) {
nx += ggml_nelements(layer.attention_norm);
nx += ggml_nelements(layer.wq);
nx += ggml_nelements(layer.wk);

View file

@ -1092,7 +1092,7 @@ struct llama_mlock {
bool failed_already = false;
llama_mlock() {}
llama_mlock() = default;
llama_mlock(const llama_mlock &) = delete;
~llama_mlock() {
@ -2958,7 +2958,7 @@ static void llm_load_hparams(
}
// TODO: This should probably be in llama.h
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special = false);
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special = false);
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch);
static void llm_load_vocab(
@ -7111,8 +7111,8 @@ struct llm_tokenizer_bpe {
const auto token = vocab.token_to_id.find(str);
if (token == vocab.token_to_id.end()) {
for (auto j = str.begin(); j != str.end(); ++j) {
std::string byte_str(1, *j);
for (char j : str) {
std::string byte_str(1, j);
auto token_multibyte = vocab.token_to_id.find(byte_str);
if (token_multibyte == vocab.token_to_id.end()) {
throw std::runtime_error("ERROR: byte not found in vocab");
@ -7172,8 +7172,8 @@ private:
bpe_encoded_words.reserve(text.size());
auto cps = codepoints_from_utf8(text);
for (size_t i = 0; i < cps.size(); ++i)
text_utf.emplace_back(codepoint_to_utf8(cps[i]));
for (unsigned int cp : cps)
text_utf.emplace_back(codepoint_to_utf8(cp));
for (int i = 0; i < (int)text_utf.size(); i++) {
const std::string & utf_char = text_utf[i];
@ -7344,7 +7344,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
const auto & special_id = st.second;
// for each text fragment
std::forward_list<fragment_buffer_variant>::iterator it = buffer.begin();
auto it = buffer.begin();
while (it != buffer.end()) {
auto & fragment = (*it);
@ -7431,7 +7431,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
}
}
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special) {
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special) {
std::vector<llama_vocab::id> output;
// OG tokenizer behavior:
@ -7887,7 +7887,7 @@ void llama_grammar_free(struct llama_grammar * grammar) {
}
struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar) {
llama_grammar * result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 };
auto result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 };
// redirect elements in stacks to point to new rules
for (size_t is = 0; is < result->stacks.size(); is++) {
@ -8095,8 +8095,8 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
}
// Calculate absolute value of second derivatives
for (size_t i = 0; i < second_derivatives.size(); ++i) {
second_derivatives[i] = std::abs(second_derivatives[i]);
for (float& second_derivative : second_derivatives) {
second_derivative = std::abs(second_derivative);
}
// Normalize the second derivatives
@ -9412,8 +9412,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
if (tot_count > 0) {
LLAMA_LOG_INFO(" | hist: ");
for (size_t i = 0; i < hist_cur.size(); i++) {
LLAMA_LOG_INFO("%5.3f ", hist_cur[i] / float(nelements));
for (long long i : hist_cur) {
LLAMA_LOG_INFO("%5.3f ", i / float(nelements));
}
}
LLAMA_LOG_INFO("\n");
@ -9448,14 +9448,14 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// print histogram for all tensors
{
int64_t sum_all = 0;
for (size_t i = 0; i < hist_all.size(); i++) {
sum_all += hist_all[i];
for (auto i : hist_all) {
sum_all += i;
}
if (sum_all > 0) {
LLAMA_LOG_INFO("%s: hist: ", __func__);
for (size_t i = 0; i < hist_all.size(); i++) {
LLAMA_LOG_INFO("%5.3f ", hist_all[i] / float(sum_all));
for (auto i : hist_all) {
LLAMA_LOG_INFO("%5.3f ", i / float(sum_all));
}
LLAMA_LOG_INFO("\n");
}
@ -9859,7 +9859,7 @@ struct llama_model * llama_load_model_from_file(
struct llama_model_params params) {
ggml_time_init();
llama_model * model = new llama_model;
auto model = new llama_model;
unsigned cur_percentage = 0;
if (params.progress_callback == NULL) {
@ -9905,7 +9905,7 @@ struct llama_context * llama_new_context_with_model(
return nullptr;
}
llama_context * ctx = new llama_context(*model);
auto ctx = new llama_context(*model);
const auto & hparams = model->hparams;
auto & cparams = ctx->cparams;

View file

@ -256,7 +256,7 @@ enum test_mode {
};
struct test_case {
virtual ~test_case() {}
virtual ~test_case() = default;
virtual std::string op_desc(ggml_tensor * t) {
return ggml_op_desc(t);
@ -281,9 +281,9 @@ struct test_case {
virtual size_t op_size(ggml_tensor * t) {
size_t size = ggml_nbytes(t);
// add source tensors
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (t->src[i] != NULL) {
size += ggml_nbytes(t->src[i]);
for (auto& el : t->src) {
if (el) {
size += ggml_nbytes(el);
}
}
return size;
@ -416,7 +416,7 @@ struct test_case {
};
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
callback_userdata * ud = (callback_userdata *) user_data;
auto ud = (callback_userdata *) user_data;
const char * bn1 = ggml_backend_name(ud->backend1);
const char * bn2 = ggml_backend_name(ud->backend2);

View file

@ -583,7 +583,7 @@ int main(int argc, const char ** argv) {
}
// mean, not yet fully implemented
if(0)
if(false)
{
srand(seed);
const int nargs = 1;
@ -601,7 +601,7 @@ int main(int argc, const char ** argv) {
}
// argmax
if (0)
if (false)
{
srand(seed);
const int nargs = 1;
@ -732,7 +732,7 @@ int main(int argc, const char ** argv) {
}
// tanh, not yet fully implemented
if(0)
if(false)
{
srand(seed);
const int nargs = 1;
@ -787,7 +787,7 @@ int main(int argc, const char ** argv) {
}
// elu, not yet fully implemented
if(0)
if(false)
{
srand(seed);
const int nargs = 1;
@ -822,7 +822,7 @@ int main(int argc, const char ** argv) {
}
// gelu, not yet fully implemented
if(0)
if(false)
{
srand(seed);
const int nargs = 1;
@ -1559,7 +1559,7 @@ int main(int argc, const char ** argv) {
}
// flash_attn f16, not yet fully implemented
if(0)
if(false)
{
srand(seed);
const int nargs = 3;

View file

@ -29,10 +29,9 @@ term ::= [0-9]+)""";
};
uint32_t index = 0;
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
{
std::string key = it->first;
uint32_t value = it->second;
for (auto& symbol_id : parsed_grammar.symbol_ids) {
std::string key = symbol_id.first;
uint32_t value = symbol_id.second;
std::pair<std::string, uint32_t> expected_pair = expected[index];
// pretty print error message before asserting
@ -88,9 +87,7 @@ term ::= [0-9]+)""";
for (auto rule : parsed_grammar.rules)
{
// compare rule to expected rule
for (uint32_t i = 0; i < rule.size(); i++)
{
llama_grammar_element element = rule[i];
for (auto element : rule) {
llama_grammar_element expected_element = expected_rules[index];
// pretty print error message before asserting
@ -135,10 +132,9 @@ term ::= [0-9]+)""";
};
index = 0;
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
{
std::string key = it->first;
uint32_t value = it->second;
for (auto& symbol_id : parsed_grammar.symbol_ids) {
std::string key = symbol_id.first;
uint32_t value = symbol_id.second;
std::pair<std::string, uint32_t> expected_pair = expected[index];
// pretty print error message before asserting
@ -227,9 +223,7 @@ term ::= [0-9]+)""";
for (auto rule : parsed_grammar.rules)
{
// compare rule to expected rule
for (uint32_t i = 0; i < rule.size(); i++)
{
llama_grammar_element element = rule[i];
for (auto element : rule) {
llama_grammar_element expected_element = expected_rules[index];
// pretty print error message before asserting

View file

@ -98,14 +98,14 @@ int main()
},
};
for (auto pair : expected)
for (const auto& pair : expected)
{
parsed_grammar.symbol_ids[pair.first] = pair.second;
}
for (auto rule : expected_rules)
for (const auto& rule : expected_rules)
{
parsed_grammar.rules.push_back({});
parsed_grammar.rules.emplace_back();
for (auto element : rule)
{
parsed_grammar.rules.back().push_back(element);

View file

@ -126,7 +126,7 @@ int main(int argc, char * argv[]) {
bool failed = false;
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
auto type = (ggml_type) i;
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
// deprecated - skip

View file

@ -269,7 +269,7 @@ int main(int argc, char * argv[]) {
struct ggml_context * ctx = ggml_init(ggml_params);
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
auto type = (ggml_type) i;
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
continue;