Add additional fixes
Change bind to lambdas Change push_back to emplace_back Replace for with range-based for Use auto to avoid duplication Use bool values instead of 0 Use pass-by-value with std::move
This commit is contained in:
parent
e41d94972c
commit
55b008cdec
28 changed files with 237 additions and 259 deletions
|
@ -511,7 +511,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
|
||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
||||
params.use_mmap = false;
|
||||
} else if (arg == "--lora-scaled") {
|
||||
if (++i >= argc) {
|
||||
|
@ -523,7 +523,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
|
||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
||||
params.use_mmap = false;
|
||||
} else if (arg == "--lora-base") {
|
||||
if (++i >= argc) {
|
||||
|
@ -875,7 +875,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
|||
}
|
||||
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back(llama_model_kv_override());
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
|
@ -1335,8 +1335,8 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vector<llama_to
|
|||
std::string piece;
|
||||
std::string result;
|
||||
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
piece = llama_token_to_piece(ctx, tokens[i]);
|
||||
for (int token : tokens) {
|
||||
piece = llama_token_to_piece(ctx, token);
|
||||
|
||||
result += piece;
|
||||
}
|
||||
|
|
|
@ -296,9 +296,9 @@ namespace grammar_parser {
|
|||
|
||||
static bool is_char_element(llama_grammar_element elem) {
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_CHAR: return true;
|
||||
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
||||
case LLAMA_GRETYPE_CHAR_ALT: return true;
|
||||
case LLAMA_GRETYPE_CHAR:
|
||||
case LLAMA_GRETYPE_CHAR_NOT:
|
||||
case LLAMA_GRETYPE_CHAR_ALT:
|
||||
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
|
||||
default: return false;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "sampling.h"
|
||||
|
||||
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
|
||||
struct llama_sampling_context * result = new llama_sampling_context();
|
||||
auto result = new llama_sampling_context();
|
||||
|
||||
result->params = params;
|
||||
result->grammar = nullptr;
|
||||
|
@ -197,8 +197,8 @@ static llama_token llama_sampling_sample_impl(
|
|||
}
|
||||
|
||||
// apply params.logit_bias map
|
||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
||||
logits[it->first] += it->second;
|
||||
for (auto logit_bia : params.logit_bias) {
|
||||
logits[logit_bia.first] += logit_bia.second;
|
||||
}
|
||||
|
||||
if (ctx_cfg) {
|
||||
|
|
|
@ -18,7 +18,7 @@ struct random_uniform_distribution {
|
|||
};
|
||||
|
||||
struct train_state * init_train_state() {
|
||||
struct train_state * state = new struct train_state;
|
||||
auto state = new struct train_state;
|
||||
state->train_its = 0;
|
||||
state->train_samples = 0;
|
||||
state->train_tokens = 0;
|
||||
|
@ -1379,7 +1379,7 @@ void finish_processing_train_args(struct train_params_common * params) {
|
|||
}
|
||||
|
||||
void train_opt_callback(void * vdata, int accum_step, float * sched, bool * cancel) {
|
||||
struct train_opt_callback_data * data = (struct train_opt_callback_data *) vdata;
|
||||
auto data = (struct train_opt_callback_data *) vdata;
|
||||
struct train_params_common * params = data->params;
|
||||
struct train_state * train = data->train;
|
||||
struct ggml_opt_context * opt = train->opt;
|
||||
|
|
|
@ -164,13 +164,9 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
||||
LOG_TEE("|%6s-|-%6s-|-%4s-|-%6s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|-%8s-|\n", "------", "------", "----", "------", "--------", "--------", "--------", "--------", "--------", "--------");
|
||||
|
||||
for ( int i_pp = 0; i_pp < (int) n_pp.size(); ++i_pp) {
|
||||
for ( int i_tg = 0; i_tg < (int) n_tg.size(); ++i_tg) {
|
||||
for (int i_pl = 0; i_pl < (int) n_pl.size(); ++i_pl) {
|
||||
const int pp = n_pp[i_pp];
|
||||
const int tg = n_tg[i_tg];
|
||||
const int pl = n_pl[i_pl];
|
||||
|
||||
for (int pp : n_pp) {
|
||||
for (int tg : n_tg) {
|
||||
for (int pl : n_pl) {
|
||||
const int n_ctx_req = is_pp_shared ? pp + pl*tg : pl*(pp + tg);
|
||||
|
||||
if (n_ctx_req > n_kv_max) {
|
||||
|
|
|
@ -143,7 +143,7 @@ int main(int argc, char ** argv) {
|
|||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ ctx_size,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/* no_alloc =*/ 0
|
||||
/* no_alloc =*/ false
|
||||
};
|
||||
|
||||
ctx = ggml_init(params);
|
||||
|
|
|
@ -64,8 +64,8 @@ int main(int argc, char ** argv) {
|
|||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
|
||||
for (int i : embd_inp) {
|
||||
fprintf(stderr, "%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
|
|
@ -225,7 +225,7 @@ static void free_lora(struct lora_data * lora) {
|
|||
}
|
||||
|
||||
static struct lora_data * load_lora(struct lora_info * info) {
|
||||
struct lora_data * result = new struct lora_data;
|
||||
auto result = new struct lora_data;
|
||||
result->info = *info;
|
||||
result->ctx = NULL;
|
||||
result->lora_r = 1;
|
||||
|
@ -370,9 +370,9 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
|
|||
static void export_lora(struct export_lora_params * params) {
|
||||
// load all loras
|
||||
std::vector<struct lora_data *> loras;
|
||||
for (size_t i = 0; i < params->lora.size(); ++i) {
|
||||
struct lora_data * lora = load_lora(¶ms->lora[i]);
|
||||
if (lora != NULL) {
|
||||
for (auto& i : params->lora) {
|
||||
auto lora = load_lora(&i);
|
||||
if (lora) {
|
||||
loras.push_back(lora);
|
||||
}
|
||||
}
|
||||
|
@ -431,8 +431,8 @@ static void export_lora(struct export_lora_params * params) {
|
|||
fin.read_raw(data.data(), data.size());
|
||||
|
||||
// apply all loras
|
||||
for (size_t k = 0; k < loras.size(); ++k) {
|
||||
apply_lora(tensor, loras[k], params->n_threads);
|
||||
for (auto& lora : loras) {
|
||||
apply_lora(tensor, lora, params->n_threads);
|
||||
}
|
||||
|
||||
// write tensor data + padding
|
||||
|
@ -455,8 +455,8 @@ static void export_lora(struct export_lora_params * params) {
|
|||
gguf_free(gguf_in);
|
||||
|
||||
// free loras
|
||||
for (size_t i = 0; i < loras.size(); ++i) {
|
||||
free_lora(loras[i]);
|
||||
for (auto& lora : loras) {
|
||||
free_lora(lora);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -379,8 +379,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora)
|
|||
ggml_allocr_alloc(alloc, lora->norm_b);
|
||||
ggml_allocr_alloc(alloc, lora->output_a);
|
||||
ggml_allocr_alloc(alloc, lora->output_b);
|
||||
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
|
||||
auto & layer = lora->layers[i];
|
||||
for (auto& layer : lora->layers) {
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm_a);
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm_b);
|
||||
ggml_allocr_alloc(alloc, layer.wq_a);
|
||||
|
@ -406,8 +405,7 @@ static void alloc_lora(struct ggml_allocr * alloc, struct my_llama_lora * lora)
|
|||
ggml_allocr_alloc(alloc, lora->norm_b->grad);
|
||||
ggml_allocr_alloc(alloc, lora->output_a->grad);
|
||||
ggml_allocr_alloc(alloc, lora->output_b->grad);
|
||||
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
|
||||
auto & layer = lora->layers[i];
|
||||
for (auto& layer : lora->layers) {
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm_a->grad);
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm_b->grad);
|
||||
ggml_allocr_alloc(alloc, layer.wq_a->grad);
|
||||
|
@ -803,9 +801,9 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
|
|||
|
||||
// allocating checkpoints in one block to reduce memory fragmentation
|
||||
// note: they will be freed in reverse order
|
||||
for (unsigned int i = 0; i < checkpoints.size(); ++i) {
|
||||
if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
|
||||
ggml_allocr_alloc(alloc, checkpoints[i]);
|
||||
for (auto& checkpoint : checkpoints) {
|
||||
if (checkpoint->data == NULL && checkpoint->view_src == NULL) {
|
||||
ggml_allocr_alloc(alloc, checkpoint);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -872,8 +870,7 @@ static void load_llama_lora_gguf(struct gguf_context * fctx, struct ggml_context
|
|||
copy_tensor_by_name(lora->output_a, f_ggml_ctx, ggml_get_name(lora->output_a));
|
||||
copy_tensor_by_name(lora->output_b, f_ggml_ctx, ggml_get_name(lora->output_b));
|
||||
|
||||
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
|
||||
auto & layer = lora->layers[i];
|
||||
for (auto& layer : lora->layers) {
|
||||
copy_tensor_by_name(layer.attention_norm_a, f_ggml_ctx, ggml_get_name(layer.attention_norm_a));
|
||||
copy_tensor_by_name(layer.attention_norm_b, f_ggml_ctx, ggml_get_name(layer.attention_norm_b));
|
||||
copy_tensor_by_name(layer.wq_a, f_ggml_ctx, ggml_get_name(layer.wq_a));
|
||||
|
@ -940,9 +937,7 @@ static void save_llama_lora_gguf(struct gguf_context * fctx, struct my_llama_mod
|
|||
gguf_add_tensor(fctx, lora->output_a);
|
||||
gguf_add_tensor(fctx, lora->output_b);
|
||||
|
||||
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
|
||||
auto & layer = lora->layers[i];
|
||||
|
||||
for (auto& layer : lora->layers) {
|
||||
gguf_add_tensor(fctx, layer.attention_norm_a);
|
||||
gguf_add_tensor(fctx, layer.attention_norm_b);
|
||||
gguf_add_tensor(fctx, layer.wq_a);
|
||||
|
@ -1476,7 +1471,7 @@ struct save_train_files_data {
|
|||
};
|
||||
|
||||
static void save_train_files(void * vdata, struct train_state * train) {
|
||||
struct save_train_files_data * data = (struct save_train_files_data *) vdata;
|
||||
auto data = (struct save_train_files_data *) vdata;
|
||||
|
||||
int64_t iter = train->opt->iter;
|
||||
|
||||
|
@ -1499,8 +1494,7 @@ static int64_t get_parameter_count(struct my_llama_lora* lora) {
|
|||
nx += ggml_nelements(lora->output_a);
|
||||
nx += ggml_nelements(lora->output_b);
|
||||
|
||||
for (uint32_t i = 0; i < lora->layers.size(); ++i) {
|
||||
auto & layer = lora->layers[i];
|
||||
for (auto& layer : lora->layers) {
|
||||
nx += ggml_nelements(layer.attention_norm_a);
|
||||
nx += ggml_nelements(layer.attention_norm_b);
|
||||
nx += ggml_nelements(layer.wq_a);
|
||||
|
@ -1817,12 +1811,12 @@ int main(int argc, char ** argv) {
|
|||
|
||||
std::vector<size_t> token_noccurs;
|
||||
token_noccurs.resize(model.hparams.n_vocab, 0);
|
||||
for (unsigned int i = 0; i < train_tokens.size(); ++i) {
|
||||
++token_noccurs[train_tokens[i]];
|
||||
for (int train_token : train_tokens) {
|
||||
++token_noccurs[train_token];
|
||||
}
|
||||
int n_unique_tokens = 0;
|
||||
for (unsigned int i = 0; i < token_noccurs.size(); ++i) {
|
||||
if (token_noccurs[i] == 0) continue;
|
||||
for (unsigned long long token_noccur : token_noccurs) {
|
||||
if (token_noccur == 0) continue;
|
||||
++n_unique_tokens;
|
||||
}
|
||||
printf("%s: number of unique tokens: %d\n", __func__, n_unique_tokens);
|
||||
|
|
|
@ -216,8 +216,8 @@ static std::vector<float> softmax(const std::vector<float>& logits) {
|
|||
sum_exp += exp_logit;
|
||||
probs[i] = exp_logit;
|
||||
}
|
||||
for (size_t i = 0; i < probs.size(); i++) {
|
||||
probs[i] /= sum_exp;
|
||||
for (float& prob : probs) {
|
||||
prob /= static_cast<float>(sum_exp);
|
||||
}
|
||||
return probs;
|
||||
}
|
||||
|
|
|
@ -313,16 +313,16 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
|
||||
for (int i : embd_inp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
|
||||
if (ctx_guidance) {
|
||||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str());
|
||||
LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
|
||||
for (int i = 0; i < (int) guidance_inp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
|
||||
for (int i : guidance_inp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -745,7 +745,7 @@ const std::string test::cpu_info = get_cpu_info();
|
|||
const std::string test::gpu_info = get_gpu_info();
|
||||
|
||||
struct printer {
|
||||
virtual ~printer() {}
|
||||
virtual ~printer() = default;
|
||||
|
||||
FILE * fout;
|
||||
virtual void print_header(const cmd_params & params) { (void) params; }
|
||||
|
@ -891,43 +891,43 @@ struct markdown_printer : public printer {
|
|||
|
||||
void print_header(const cmd_params & params) override {
|
||||
// select fields to print
|
||||
fields.push_back("model");
|
||||
fields.push_back("size");
|
||||
fields.push_back("params");
|
||||
fields.push_back("backend");
|
||||
fields.emplace_back("model");
|
||||
fields.emplace_back("size");
|
||||
fields.emplace_back("params");
|
||||
fields.emplace_back("backend");
|
||||
bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS";
|
||||
if (!is_cpu_backend) {
|
||||
fields.push_back("n_gpu_layers");
|
||||
fields.emplace_back("n_gpu_layers");
|
||||
}
|
||||
if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {
|
||||
fields.push_back("n_threads");
|
||||
fields.emplace_back("n_threads");
|
||||
}
|
||||
if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
|
||||
fields.push_back("n_batch");
|
||||
fields.emplace_back("n_batch");
|
||||
}
|
||||
if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) {
|
||||
fields.push_back("type_k");
|
||||
fields.emplace_back("type_k");
|
||||
}
|
||||
if (params.type_v.size() > 1 || params.type_v != cmd_params_defaults.type_v) {
|
||||
fields.push_back("type_v");
|
||||
fields.emplace_back("type_v");
|
||||
}
|
||||
if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) {
|
||||
fields.push_back("main_gpu");
|
||||
fields.emplace_back("main_gpu");
|
||||
}
|
||||
if (params.split_mode.size() > 1 || params.split_mode != cmd_params_defaults.split_mode) {
|
||||
fields.push_back("split_mode");
|
||||
fields.emplace_back("split_mode");
|
||||
}
|
||||
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
|
||||
fields.push_back("mul_mat_q");
|
||||
fields.emplace_back("mul_mat_q");
|
||||
}
|
||||
if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) {
|
||||
fields.push_back("no_kv_offload");
|
||||
fields.emplace_back("no_kv_offload");
|
||||
}
|
||||
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
|
||||
fields.push_back("tensor_split");
|
||||
fields.emplace_back("tensor_split");
|
||||
}
|
||||
fields.push_back("test");
|
||||
fields.push_back("t/s");
|
||||
fields.emplace_back("test");
|
||||
fields.emplace_back("t/s");
|
||||
|
||||
fprintf(fout, "|");
|
||||
for (const auto & field : fields) {
|
||||
|
|
|
@ -800,7 +800,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
|
||||
buffer_size += n_tensors * 128 /* CLIP PADDING */;
|
||||
|
||||
clip_ctx * new_clip = new clip_ctx;
|
||||
auto* new_clip = new clip_ctx;
|
||||
|
||||
// update projector type
|
||||
{
|
||||
|
@ -1416,13 +1416,13 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||
printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
|
||||
|
||||
int64_t sum_all = 0;
|
||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
||||
sum_all += hist_all[i];
|
||||
for (auto i : hist_all) {
|
||||
sum_all += i;
|
||||
}
|
||||
|
||||
printf("%s: hist: ", __func__);
|
||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
||||
printf("%5.3f ", hist_all[i] / (float)sum_all);
|
||||
for (auto i : hist_all) {
|
||||
printf("%5.3f ", i / (float)sum_all);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
|
|
@ -132,7 +132,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
for (int i = 0; i < W; i++) {
|
||||
// there are different ways to init these tokens
|
||||
if (0) {
|
||||
if (false) {
|
||||
// initialize randomly from the prompt tokens
|
||||
tokens_j[j][i] = all[1 + rand() % (all.size() - 1)];
|
||||
} else {
|
||||
|
@ -268,10 +268,10 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// if no active ngrams are left, it means the sampled token does not pass the verification
|
||||
if (v > 0) {
|
||||
for (int g = 0; g < (int) ngrams_cur.size(); g++) {
|
||||
if (ngrams_cur[g].active) {
|
||||
i_batch = ngrams_cur[g].i_batch[v];
|
||||
seq_id_best = ngrams_cur[g].seq_id;
|
||||
for (auto& g : ngrams_cur) {
|
||||
if (g.active) {
|
||||
i_batch = g.i_batch[v];
|
||||
seq_id_best = g.seq_id;
|
||||
|
||||
++n_accept;
|
||||
break;
|
||||
|
@ -316,20 +316,20 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// verify across active n-grams
|
||||
for (int g = 0; g < (int) ngrams_cur.size(); g++) {
|
||||
if (ngrams_cur[g].active) {
|
||||
for (auto& g : ngrams_cur) {
|
||||
if (g.active) {
|
||||
if (v == N - 1) {
|
||||
ngrams_cur[g].active = false;
|
||||
g.active = false;
|
||||
} else {
|
||||
if (id != ngrams_cur[g].tokens[v + 1]) {
|
||||
ngrams_cur[g].active = false;
|
||||
if (id != g.tokens[v + 1]) {
|
||||
g.active = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// print known n-grams starting with token id (debug)
|
||||
if (0 && v == 0) {
|
||||
if (false && v == 0) {
|
||||
if (ngrams_observed.cnt[id] > 0) {
|
||||
printf("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], llama_token_to_piece(ctx, id).c_str());
|
||||
}
|
||||
|
@ -367,7 +367,7 @@ int main(int argc, char ** argv) {
|
|||
} else {
|
||||
for (int i = 0; i < W; i++) {
|
||||
// there are different ways to init these tokens
|
||||
if (0) {
|
||||
if (false) {
|
||||
// random init
|
||||
tokens_j[N - 2][i] = all[1 + rand() % (all.size() - 1)];
|
||||
} else {
|
||||
|
|
|
@ -344,12 +344,12 @@ int main(int argc, char ** argv) {
|
|||
// in instruct mode, we inject a prefix and a suffix to each input by the user
|
||||
if (params.instruct) {
|
||||
params.interactive_first = true;
|
||||
params.antiprompt.push_back("### Instruction:\n\n");
|
||||
params.antiprompt.emplace_back("### Instruction:\n\n");
|
||||
}
|
||||
// similar for chatml mode
|
||||
else if (params.chatml) {
|
||||
params.interactive_first = true;
|
||||
params.antiprompt.push_back("<|im_start|>user\n");
|
||||
params.antiprompt.emplace_back("<|im_start|>user\n");
|
||||
}
|
||||
|
||||
// enable interactive mode if interactive start is specified
|
||||
|
@ -361,16 +361,16 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", embd_inp[i], llama_token_to_piece(ctx, embd_inp[i]).c_str());
|
||||
for (int i : embd_inp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
|
||||
if (ctx_guidance) {
|
||||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str());
|
||||
LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
|
||||
for (int i = 0; i < (int) guidance_inp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
|
||||
for (int i : guidance_inp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -405,8 +405,8 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::llama_tokenize(ctx, antiprompt, false, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
|
||||
for (int i : tmp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -420,8 +420,8 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("Input prefix: '%s'\n", params.input_prefix.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
|
||||
for (int i : tmp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -430,8 +430,8 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
|
||||
for (int i : tmp) {
|
||||
LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,8 +94,8 @@ static std::vector<float> softmax(const std::vector<float>& logits) {
|
|||
sum_exp += exp_logit;
|
||||
probs[i] = exp_logit;
|
||||
}
|
||||
for (size_t i = 0; i < probs.size(); i++) {
|
||||
probs[i] /= sum_exp;
|
||||
for (float& prob : probs) {
|
||||
prob /= static_cast<float>(sum_exp);
|
||||
}
|
||||
return probs;
|
||||
}
|
||||
|
@ -881,7 +881,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
size_t li = hs_cur.common_prefix;
|
||||
for (int s = 0; s < 4; ++s) {
|
||||
for (size_t j = hs_cur.common_prefix; j < hs_cur.seq_tokens[s].size() - 1; j++) {
|
||||
eval_pairs.push_back(std::make_pair(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]));
|
||||
eval_pairs.emplace_back(hs_cur.i_batch + li++, hs_cur.seq_tokens[s][j + 1]);
|
||||
}
|
||||
++li;
|
||||
}
|
||||
|
@ -997,7 +997,7 @@ static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string&
|
|||
printf("%s: no _ in <%s>\n", __func__, sentence.c_str());
|
||||
continue;
|
||||
}
|
||||
std::istringstream stream(answer.c_str());
|
||||
std::istringstream stream(answer);
|
||||
int i_answer; stream >> i_answer;
|
||||
if (stream.fail() || i_answer < 1 || i_answer > 2) {
|
||||
printf("%s: failed to parse answer <%s>\n", __func__, answer.c_str());
|
||||
|
@ -1158,13 +1158,13 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
const int last_1st = task.seq_tokens[0].size() - n_base1 > 1 ? 1 : 0;
|
||||
size_t li = n_base1 - 1;
|
||||
for (size_t j = n_base1-1; j < task.seq_tokens[0].size()-1-last_1st; ++j) {
|
||||
eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[0][j+1]));
|
||||
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[0][j+1]);
|
||||
}
|
||||
const auto& n_base2 = skip_choice ? task.n_base2 : task.common_prefix;
|
||||
const int last_2nd = task.seq_tokens[1].size() - n_base2 > 1 ? 1 : 0;
|
||||
li = task.seq_tokens[0].size() - task.common_prefix + n_base2 - 1;
|
||||
for (size_t j = n_base2-1; j < task.seq_tokens[1].size()-1-last_2nd; ++j) {
|
||||
eval_pairs.push_back(std::make_pair(task.i_batch + li++, task.seq_tokens[1][j+1]));
|
||||
eval_pairs.emplace_back(task.i_batch + li++, task.seq_tokens[1][j+1]);
|
||||
}
|
||||
}
|
||||
compute_logprobs(batch_logits.data(), n_vocab, workers, eval_pairs, eval_results);
|
||||
|
@ -1221,7 +1221,7 @@ static bool deserialize_string(std::istream & in, std::string & str) {
|
|||
uint32_t size;
|
||||
if (!in.read((char *)&size, sizeof(size)).fail()) {
|
||||
str.resize(size);
|
||||
if (!in.read((char *)&str[0], size).fail()) return true;
|
||||
if (!in.read((char *)str.data(), size).fail()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1523,7 +1523,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
|||
size_t li = cur_task.common_prefix;
|
||||
for (int s = 0; s < int(cur_task.seq_tokens.size()); ++s) {
|
||||
for (size_t j = cur_task.common_prefix; j < cur_task.seq_tokens[s].size() - 1; j++) {
|
||||
eval_pairs.push_back(std::make_pair(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]));
|
||||
eval_pairs.emplace_back(cur_task.i_batch + li++, cur_task.seq_tokens[s][j + 1]);
|
||||
}
|
||||
++li;
|
||||
}
|
||||
|
|
|
@ -223,6 +223,7 @@ using socket_t = int;
|
|||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
#ifdef _WIN32
|
||||
|
@ -705,7 +706,7 @@ public:
|
|||
Server &set_file_request_handler(Handler handler);
|
||||
|
||||
Server &set_error_handler(HandlerWithResponse handler);
|
||||
Server &set_error_handler(Handler handler);
|
||||
Server &set_error_handler(const Handler& handler);
|
||||
Server &set_exception_handler(ExceptionHandler handler);
|
||||
Server &set_pre_routing_handler(HandlerWithResponse handler);
|
||||
Server &set_post_routing_handler(Handler handler);
|
||||
|
@ -781,7 +782,7 @@ private:
|
|||
bool dispatch_request(Request &req, Response &res, const Handlers &handlers);
|
||||
bool
|
||||
dispatch_request_for_content_reader(Request &req, Response &res,
|
||||
ContentReader content_reader,
|
||||
const ContentReader& content_reader,
|
||||
const HandlersForContentReader &handlers);
|
||||
|
||||
bool parse_request_line(const char *s, Request &req);
|
||||
|
@ -804,7 +805,7 @@ private:
|
|||
MultipartContentHeader multipart_header,
|
||||
ContentReceiver multipart_receiver);
|
||||
bool read_content_core(Stream &strm, Request &req, Response &res,
|
||||
ContentReceiver receiver,
|
||||
const ContentReceiver& receiver,
|
||||
MultipartContentHeader multipart_header,
|
||||
ContentReceiver multipart_receiver);
|
||||
|
||||
|
@ -910,8 +911,8 @@ public:
|
|||
explicit ClientImpl(const std::string &host, int port);
|
||||
|
||||
explicit ClientImpl(const std::string &host, int port,
|
||||
const std::string &client_cert_path,
|
||||
const std::string &client_key_path);
|
||||
std::string client_cert_path,
|
||||
std::string client_key_path);
|
||||
|
||||
virtual ~ClientImpl();
|
||||
|
||||
|
@ -937,7 +938,7 @@ public:
|
|||
Result Get(const std::string &path, ResponseHandler response_handler,
|
||||
ContentReceiver content_receiver, Progress progress);
|
||||
Result Get(const std::string &path, const Headers &headers,
|
||||
ResponseHandler response_handler, ContentReceiver content_receiver,
|
||||
ResponseHandler response_handler, const ContentReceiver& content_receiver,
|
||||
Progress progress);
|
||||
|
||||
Result Get(const std::string &path, const Params ¶ms,
|
||||
|
@ -946,8 +947,8 @@ public:
|
|||
const Headers &headers, ContentReceiver content_receiver,
|
||||
Progress progress = nullptr);
|
||||
Result Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers, ResponseHandler response_handler,
|
||||
ContentReceiver content_receiver, Progress progress = nullptr);
|
||||
const Headers &headers, const ResponseHandler& response_handler,
|
||||
const ContentReceiver& content_receiver, const Progress& progress = nullptr);
|
||||
|
||||
Result Head(const std::string &path);
|
||||
Result Head(const std::string &path, const Headers &headers);
|
||||
|
@ -1790,7 +1791,7 @@ void hosted_at(const std::string &hostname, std::vector<std::string> &addrs);
|
|||
|
||||
std::string append_query_params(const std::string &path, const Params ¶ms);
|
||||
|
||||
std::pair<std::string, std::string> make_range_header(Ranges ranges);
|
||||
std::pair<std::string, std::string> make_range_header(const Ranges& ranges);
|
||||
|
||||
std::pair<std::string, std::string>
|
||||
make_basic_authentication_header(const std::string &username,
|
||||
|
@ -1808,12 +1809,12 @@ void read_file(const std::string &path, std::string &out);
|
|||
std::string trim_copy(const std::string &s);
|
||||
|
||||
void split(const char *b, const char *e, char d,
|
||||
std::function<void(const char *, const char *)> fn);
|
||||
const std::function<void(const char *, const char *)>& fn);
|
||||
|
||||
bool process_client_socket(socket_t sock, time_t read_timeout_sec,
|
||||
time_t read_timeout_usec, time_t write_timeout_sec,
|
||||
time_t write_timeout_usec,
|
||||
std::function<bool(Stream &)> callback);
|
||||
const std::function<bool(Stream &)>& callback);
|
||||
|
||||
socket_t create_client_socket(
|
||||
const std::string &host, const std::string &ip, int port,
|
||||
|
@ -2231,8 +2232,8 @@ inline void read_file(const std::string &path, std::string &out) {
|
|||
fs.seekg(0, std::ios_base::end);
|
||||
auto size = fs.tellg();
|
||||
fs.seekg(0);
|
||||
out.resize(static_cast<size_t>(size));
|
||||
fs.read(&out[0], static_cast<std::streamsize>(size));
|
||||
out.resize(size);
|
||||
fs.read(&out[0], size);
|
||||
}
|
||||
|
||||
inline std::string file_extension(const std::string &path) {
|
||||
|
@ -2261,7 +2262,7 @@ inline std::string trim_copy(const std::string &s) {
|
|||
}
|
||||
|
||||
inline void split(const char *b, const char *e, char d,
|
||||
std::function<void(const char *, const char *)> fn) {
|
||||
const std::function<void(const char *, const char *)>& fn) {
|
||||
size_t i = 0;
|
||||
size_t beg = 0;
|
||||
|
||||
|
@ -2624,7 +2625,7 @@ inline bool process_client_socket(socket_t sock, time_t read_timeout_sec,
|
|||
time_t read_timeout_usec,
|
||||
time_t write_timeout_sec,
|
||||
time_t write_timeout_usec,
|
||||
std::function<bool(Stream &)> callback) {
|
||||
const std::function<bool(Stream &)>& callback) {
|
||||
SocketStream strm(sock, read_timeout_sec, read_timeout_usec,
|
||||
write_timeout_sec, write_timeout_usec);
|
||||
return callback(strm);
|
||||
|
@ -3037,7 +3038,7 @@ find_content_type(const std::string &path,
|
|||
case "svg"_t: return "image/svg+xml";
|
||||
case "webp"_t: return "image/webp";
|
||||
case "ico"_t: return "image/x-icon";
|
||||
case "tif"_t: return "image/tiff";
|
||||
case "tif"_t:
|
||||
case "tiff"_t: return "image/tiff";
|
||||
case "jpg"_t:
|
||||
case "jpeg"_t: return "image/jpeg";
|
||||
|
@ -3488,13 +3489,13 @@ inline bool read_headers(Stream &strm, Headers &headers) {
|
|||
}
|
||||
|
||||
inline bool read_content_with_length(Stream &strm, uint64_t len,
|
||||
Progress progress,
|
||||
ContentReceiverWithProgress out) {
|
||||
const Progress& progress,
|
||||
const ContentReceiverWithProgress& out) {
|
||||
char buf[CPPHTTPLIB_RECV_BUFSIZ];
|
||||
|
||||
uint64_t r = 0;
|
||||
while (r < len) {
|
||||
auto read_len = static_cast<size_t>(len - r);
|
||||
auto read_len = len - r;
|
||||
auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
|
||||
if (n <= 0) { return false; }
|
||||
|
||||
|
@ -3513,7 +3514,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) {
|
|||
char buf[CPPHTTPLIB_RECV_BUFSIZ];
|
||||
uint64_t r = 0;
|
||||
while (r < len) {
|
||||
auto read_len = static_cast<size_t>(len - r);
|
||||
auto read_len = len - r;
|
||||
auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ));
|
||||
if (n <= 0) { return; }
|
||||
r += static_cast<uint64_t>(n);
|
||||
|
@ -3521,7 +3522,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) {
|
|||
}
|
||||
|
||||
inline bool read_content_without_length(Stream &strm,
|
||||
ContentReceiverWithProgress out) {
|
||||
const ContentReceiverWithProgress& out) {
|
||||
char buf[CPPHTTPLIB_RECV_BUFSIZ];
|
||||
uint64_t r = 0;
|
||||
for (;;) {
|
||||
|
@ -3983,12 +3984,12 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) try {
|
|||
if (std::regex_match(b, e, cm, re_another_range)) {
|
||||
ssize_t first = -1;
|
||||
if (!cm.str(1).empty()) {
|
||||
first = static_cast<ssize_t>(std::stoll(cm.str(1)));
|
||||
first = std::stoll(cm.str(1));
|
||||
}
|
||||
|
||||
ssize_t last = -1;
|
||||
if (!cm.str(2).empty()) {
|
||||
last = static_cast<ssize_t>(std::stoll(cm.str(2)));
|
||||
last = std::stoll(cm.str(2));
|
||||
}
|
||||
|
||||
if (first != -1 && last != -1 && first > last) {
|
||||
|
@ -4254,9 +4255,8 @@ inline std::string make_multipart_data_boundary() {
|
|||
|
||||
inline bool is_multipart_boundary_chars_valid(const std::string &boundary) {
|
||||
auto valid = true;
|
||||
for (size_t i = 0; i < boundary.size(); i++) {
|
||||
auto c = boundary[i];
|
||||
if (!std::isalnum(c) && c != '-' && c != '_') {
|
||||
for (char c : boundary) {
|
||||
if (!std::isalnum(c) && c != '-' && c != '_') {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
|
@ -4707,7 +4707,7 @@ inline bool parse_www_authenticate(const Response &res,
|
|||
s = s.substr(pos + 1);
|
||||
auto beg = std::sregex_iterator(s.begin(), s.end(), re);
|
||||
for (auto i = beg; i != std::sregex_iterator(); ++i) {
|
||||
auto m = *i;
|
||||
const auto& m = *i;
|
||||
auto key = s.substr(static_cast<size_t>(m.position(1)),
|
||||
static_cast<size_t>(m.length(1)));
|
||||
auto val = m.length(2) > 0
|
||||
|
@ -4802,7 +4802,7 @@ inline std::string append_query_params(const std::string &path,
|
|||
}
|
||||
|
||||
// Header utilities
|
||||
inline std::pair<std::string, std::string> make_range_header(Ranges ranges) {
|
||||
inline std::pair<std::string, std::string> make_range_header(const Ranges& ranges) {
|
||||
std::string field = "bytes=";
|
||||
auto i = 0;
|
||||
for (auto r : ranges) {
|
||||
|
@ -4949,7 +4949,7 @@ inline void Response::set_content_provider(
|
|||
set_header("Content-Type", content_type);
|
||||
content_length_ = in_length;
|
||||
if (in_length > 0) { content_provider_ = std::move(provider); }
|
||||
content_provider_resource_releaser_ = resource_releaser;
|
||||
content_provider_resource_releaser_ = std::move(resource_releaser);
|
||||
is_chunked_content_provider_ = false;
|
||||
}
|
||||
|
||||
|
@ -4959,7 +4959,7 @@ inline void Response::set_content_provider(
|
|||
set_header("Content-Type", content_type);
|
||||
content_length_ = 0;
|
||||
content_provider_ = detail::ContentProviderAdapter(std::move(provider));
|
||||
content_provider_resource_releaser_ = resource_releaser;
|
||||
content_provider_resource_releaser_ = std::move(resource_releaser);
|
||||
is_chunked_content_provider_ = false;
|
||||
}
|
||||
|
||||
|
@ -4969,7 +4969,7 @@ inline void Response::set_chunked_content_provider(
|
|||
set_header("Content-Type", content_type);
|
||||
content_length_ = 0;
|
||||
content_provider_ = detail::ContentProviderAdapter(std::move(provider));
|
||||
content_provider_resource_releaser_ = resource_releaser;
|
||||
content_provider_resource_releaser_ = std::move(resource_releaser);
|
||||
is_chunked_content_provider_ = true;
|
||||
}
|
||||
|
||||
|
@ -5010,7 +5010,7 @@ inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec,
|
|||
write_timeout_sec_(write_timeout_sec),
|
||||
write_timeout_usec_(write_timeout_usec), read_buff_(read_buff_size_, 0) {}
|
||||
|
||||
inline SocketStream::~SocketStream() {}
|
||||
inline SocketStream::~SocketStream() = default;
|
||||
|
||||
inline bool SocketStream::is_readable() const {
|
||||
return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0;
|
||||
|
@ -5101,7 +5101,7 @@ inline ssize_t BufferStream::read(char *ptr, size_t size) {
|
|||
#else
|
||||
auto len_read = buffer.copy(ptr, size, position);
|
||||
#endif
|
||||
position += static_cast<size_t>(len_read);
|
||||
position += len_read;
|
||||
return static_cast<ssize_t>(len_read);
|
||||
}
|
||||
|
||||
|
@ -5131,7 +5131,7 @@ inline Server::Server()
|
|||
#endif
|
||||
}
|
||||
|
||||
inline Server::~Server() {}
|
||||
inline Server::~Server() = default;
|
||||
|
||||
inline Server &Server::Get(const std::string &pattern, Handler handler) {
|
||||
get_handlers_.push_back(
|
||||
|
@ -5241,7 +5241,7 @@ inline Server &Server::set_error_handler(HandlerWithResponse handler) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
inline Server &Server::set_error_handler(Handler handler) {
|
||||
inline Server &Server::set_error_handler(const Handler& handler) {
|
||||
error_handler_ = [handler](const Request &req, Response &res) {
|
||||
handler(req, res);
|
||||
return HandlerResponse::Handled;
|
||||
|
@ -5618,7 +5618,7 @@ inline bool Server::read_content_with_content_receiver(
|
|||
}
|
||||
|
||||
inline bool Server::read_content_core(Stream &strm, Request &req, Response &res,
|
||||
ContentReceiver receiver,
|
||||
const ContentReceiver& receiver,
|
||||
MultipartContentHeader multipart_header,
|
||||
ContentReceiver multipart_receiver) {
|
||||
detail::MultipartFormDataParser multipart_form_data_parser;
|
||||
|
@ -5688,7 +5688,7 @@ inline bool Server::handle_file_request(const Request &req, Response &res,
|
|||
detail::find_content_type(path, file_extension_and_mimetype_map_);
|
||||
if (type) { res.set_header("Content-Type", type); }
|
||||
for (const auto &kv : entry.headers) {
|
||||
res.set_header(kv.first.c_str(), kv.second);
|
||||
res.set_header(kv.first, kv.second);
|
||||
}
|
||||
res.status = req.has_header("Range") ? 206 : 200;
|
||||
if (!head && file_request_handler_) {
|
||||
|
@ -6024,7 +6024,7 @@ inline void Server::apply_ranges(const Request &req, Response &res,
|
|||
}
|
||||
|
||||
inline bool Server::dispatch_request_for_content_reader(
|
||||
Request &req, Response &res, ContentReader content_reader,
|
||||
Request &req, Response &res, const ContentReader& content_reader,
|
||||
const HandlersForContentReader &handlers) {
|
||||
for (const auto &x : handlers) {
|
||||
const auto &pattern = x.first;
|
||||
|
@ -6202,11 +6202,11 @@ inline ClientImpl::ClientImpl(const std::string &host, int port)
|
|||
: ClientImpl(host, port, std::string(), std::string()) {}
|
||||
|
||||
inline ClientImpl::ClientImpl(const std::string &host, int port,
|
||||
const std::string &client_cert_path,
|
||||
const std::string &client_key_path)
|
||||
std::string client_cert_path,
|
||||
std::string client_key_path)
|
||||
: host_(host), port_(port),
|
||||
host_and_port_(adjust_host_string(host) + ":" + std::to_string(port)),
|
||||
client_cert_path_(client_cert_path), client_key_path_(client_key_path) {}
|
||||
client_cert_path_(std::move(client_cert_path)), client_key_path_(std::move(client_key_path)) {}
|
||||
|
||||
inline ClientImpl::~ClientImpl() {
|
||||
std::lock_guard<std::mutex> guard(socket_mutex_);
|
||||
|
@ -6579,7 +6579,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
|
|||
return false;
|
||||
#endif
|
||||
} else {
|
||||
ClientImpl cli(next_host.c_str(), next_port);
|
||||
ClientImpl cli(next_host, next_port);
|
||||
cli.copy_settings(*this);
|
||||
return detail::redirect(cli, req, res, path, location, error);
|
||||
}
|
||||
|
@ -7056,7 +7056,7 @@ inline Result ClientImpl::Get(const std::string &path,
|
|||
|
||||
inline Result ClientImpl::Get(const std::string &path, const Headers &headers,
|
||||
ResponseHandler response_handler,
|
||||
ContentReceiver content_receiver,
|
||||
const ContentReceiver& content_receiver,
|
||||
Progress progress) {
|
||||
Request req;
|
||||
req.method = "GET";
|
||||
|
@ -7078,27 +7078,27 @@ inline Result ClientImpl::Get(const std::string &path, const Params ¶ms,
|
|||
if (params.empty()) { return Get(path, headers); }
|
||||
|
||||
std::string path_with_query = append_query_params(path, params);
|
||||
return Get(path_with_query.c_str(), headers, progress);
|
||||
return Get(path_with_query, headers, std::move(progress));
|
||||
}
|
||||
|
||||
inline Result ClientImpl::Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers,
|
||||
ContentReceiver content_receiver,
|
||||
Progress progress) {
|
||||
return Get(path, params, headers, nullptr, content_receiver, progress);
|
||||
return Get(path, params, headers, nullptr, std::move(content_receiver), std::move(progress));
|
||||
}
|
||||
|
||||
inline Result ClientImpl::Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers,
|
||||
ResponseHandler response_handler,
|
||||
ContentReceiver content_receiver,
|
||||
Progress progress) {
|
||||
const ResponseHandler& response_handler,
|
||||
const ContentReceiver& content_receiver,
|
||||
const Progress& progress) {
|
||||
if (params.empty()) {
|
||||
return Get(path, headers, response_handler, content_receiver, progress);
|
||||
}
|
||||
|
||||
std::string path_with_query = append_query_params(path, params);
|
||||
return Get(path_with_query.c_str(), headers, response_handler,
|
||||
return Get(path_with_query, headers, response_handler,
|
||||
content_receiver, progress);
|
||||
}
|
||||
|
||||
|
@ -7201,7 +7201,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
|
|||
const auto &content_type =
|
||||
detail::serialize_multipart_formdata_get_content_type(boundary);
|
||||
const auto &body = detail::serialize_multipart_formdata(items, boundary);
|
||||
return Post(path, headers, body, content_type.c_str());
|
||||
return Post(path, headers, body, content_type);
|
||||
}
|
||||
|
||||
inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
|
||||
|
@ -7214,7 +7214,7 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers,
|
|||
const auto &content_type =
|
||||
detail::serialize_multipart_formdata_get_content_type(boundary);
|
||||
const auto &body = detail::serialize_multipart_formdata(items, boundary);
|
||||
return Post(path, headers, body, content_type.c_str());
|
||||
return Post(path, headers, body, content_type);
|
||||
}
|
||||
|
||||
inline Result
|
||||
|
@ -8361,7 +8361,7 @@ inline Client::Client(const std::string &host, int port,
|
|||
: cli_(detail::make_unique<ClientImpl>(host, port, client_cert_path,
|
||||
client_key_path)) {}
|
||||
|
||||
inline Client::~Client() {}
|
||||
inline Client::~Client() = default;
|
||||
|
||||
inline bool Client::is_valid() const {
|
||||
return cli_ != nullptr && cli_->is_valid();
|
||||
|
@ -8421,19 +8421,19 @@ inline Result Client::Get(const std::string &path, const Headers &headers,
|
|||
}
|
||||
inline Result Client::Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers, Progress progress) {
|
||||
return cli_->Get(path, params, headers, progress);
|
||||
return cli_->Get(path, params, headers, std::move(progress));
|
||||
}
|
||||
inline Result Client::Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers,
|
||||
ContentReceiver content_receiver, Progress progress) {
|
||||
return cli_->Get(path, params, headers, content_receiver, progress);
|
||||
return cli_->Get(path, params, headers, std::move(content_receiver), std::move(progress));
|
||||
}
|
||||
inline Result Client::Get(const std::string &path, const Params ¶ms,
|
||||
const Headers &headers,
|
||||
ResponseHandler response_handler,
|
||||
ContentReceiver content_receiver, Progress progress) {
|
||||
return cli_->Get(path, params, headers, response_handler, content_receiver,
|
||||
progress);
|
||||
return cli_->Get(path, params, headers, std::move(response_handler), std::move(content_receiver),
|
||||
std::move(progress));
|
||||
}
|
||||
|
||||
inline Result Client::Head(const std::string &path) { return cli_->Head(path); }
|
||||
|
@ -8754,7 +8754,7 @@ inline void Client::enable_server_certificate_verification(bool enabled) {
|
|||
}
|
||||
#endif
|
||||
|
||||
inline void Client::set_logger(Logger logger) { cli_->set_logger(logger); }
|
||||
inline void Client::set_logger(Logger logger) { cli_->set_logger(std::move(logger)); }
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
inline void Client::set_ca_cert_path(const std::string &ca_cert_file_path,
|
||||
|
|
|
@ -1202,9 +1202,8 @@ struct llama_server_context
|
|||
(json)(slot.images[image_idx].prefix_prompt);
|
||||
|
||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
||||
{
|
||||
llama_batch_add(batch, append_tokens[i], slot.n_past, { slot.id }, true);
|
||||
for (int append_token : append_tokens) {
|
||||
llama_batch_add(batch, append_token, slot.n_past, { slot.id }, true);
|
||||
slot.n_past += 1;
|
||||
}
|
||||
}
|
||||
|
@ -2034,7 +2033,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
|
||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--lora-scaled")
|
||||
|
@ -2050,7 +2049,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
|
||||
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--lora-base")
|
||||
|
@ -2192,7 +2191,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
}
|
||||
}
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back(llama_model_kv_override());
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
|
@ -2626,12 +2625,11 @@ int main(int argc, char **argv)
|
|||
if (!llama_result.error) {
|
||||
std::vector<json> result_array = format_partial_response_oaicompat( llama_result);
|
||||
|
||||
for (auto it = result_array.begin(); it != result_array.end(); ++it)
|
||||
{
|
||||
if (!it->empty()) {
|
||||
for (auto& it : result_array) {
|
||||
if (!it.empty()) {
|
||||
const std::string str =
|
||||
"data: " +
|
||||
it->dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
it.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n";
|
||||
LOG_VERBOSE("data stream", {{"to_send", str}});
|
||||
if (!sink.write(str.c_str(), str.size())) {
|
||||
|
@ -2824,19 +2822,17 @@ int main(int argc, char **argv)
|
|||
}*/
|
||||
//);
|
||||
|
||||
llama.queue_tasks.on_new_task(std::bind(
|
||||
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
|
||||
llama.queue_tasks.on_finish_multitask(std::bind(
|
||||
&llama_server_context::on_finish_multitask, &llama, std::placeholders::_1));
|
||||
llama.queue_tasks.on_all_tasks_finished(std::bind(
|
||||
&llama_server_context::run_on_all_tasks_finished, &llama));
|
||||
llama.queue_results.on_multitask_update(std::bind(
|
||||
&llama_server_queue::update_multitask,
|
||||
&llama.queue_tasks,
|
||||
std::placeholders::_1,
|
||||
std::placeholders::_2,
|
||||
std::placeholders::_3
|
||||
));
|
||||
llama.queue_tasks.on_new_task([ObjectPtr = &llama](auto&& PH1) {
|
||||
ObjectPtr->process_single_task(std::forward<decltype(PH1)>(PH1));
|
||||
});
|
||||
llama.queue_tasks.on_finish_multitask([ObjectPtr = &llama](auto&& PH1) {
|
||||
ObjectPtr->on_finish_multitask(std::forward<decltype(PH1)>(PH1));
|
||||
});
|
||||
llama.queue_tasks.on_all_tasks_finished([ObjectPtr = &llama] { ObjectPtr->run_on_all_tasks_finished(); });
|
||||
llama.queue_results.on_multitask_update([ObjectPtr = &llama.queue_tasks](auto&& PH1, auto&& PH2, auto&& PH3) {
|
||||
ObjectPtr->update_multitask(std::forward<decltype(PH1)>(PH1), std::forward<decltype(PH2)>(PH2),
|
||||
std::forward<decltype(PH3)>(PH3));
|
||||
});
|
||||
llama.queue_tasks.start_loop();
|
||||
|
||||
t.join();
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
|
@ -171,10 +172,10 @@ inline std::string format_chatml(std::vector<json> messages)
|
|||
{
|
||||
std::ostringstream chatml_msgs;
|
||||
|
||||
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
||||
for (auto& message : messages) {
|
||||
chatml_msgs << "<|im_start|>"
|
||||
<< json_value(*it, "role", std::string("user")) << '\n';
|
||||
chatml_msgs << json_value(*it, "content", std::string(""))
|
||||
<< json_value(message, "role", std::string("user")) << '\n';
|
||||
chatml_msgs << json_value(message, "content", std::string(""))
|
||||
<< "<|im_end|>\n";
|
||||
}
|
||||
|
||||
|
@ -225,17 +226,17 @@ struct llama_server_queue {
|
|||
|
||||
// Register function to process a new task
|
||||
void on_new_task(std::function<void(task_server&)> callback) {
|
||||
callback_new_task = callback;
|
||||
callback_new_task = std::move(callback);
|
||||
}
|
||||
|
||||
// Register function to process a multitask
|
||||
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
||||
callback_finish_multitask = callback;
|
||||
callback_finish_multitask = std::move(callback);
|
||||
}
|
||||
|
||||
// Register the function to be called when the batch of tasks is finished
|
||||
void on_all_tasks_finished(std::function<void(void)> callback) {
|
||||
callback_all_task_finished = callback;
|
||||
callback_all_task_finished = std::move(callback);
|
||||
}
|
||||
|
||||
// Call when the state of one slot is changed
|
||||
|
@ -378,7 +379,7 @@ struct llama_server_response {
|
|||
|
||||
// Register the function to update multitask
|
||||
void on_multitask_update(callback_multitask_t callback) {
|
||||
callback_update_multitask = callback;
|
||||
callback_update_multitask = std::move(callback);
|
||||
}
|
||||
|
||||
// Send a new result to a waiting task_id
|
||||
|
|
|
@ -32,11 +32,11 @@ int main(int argc, char ** argv) {
|
|||
|
||||
tokens = ::llama_tokenize(model, prompt, add_bos, true);
|
||||
|
||||
for (int i = 0; i < (int) tokens.size(); i++) {
|
||||
for (int token : tokens) {
|
||||
if (printing_ids) {
|
||||
printf("%d\n", tokens[i]);
|
||||
printf("%d\n", token);
|
||||
} else {
|
||||
printf("%6d -> '%s'\n", tokens[i], llama_token_to_piece(ctx, tokens[i]).c_str());
|
||||
printf("%6d -> '%s'\n", token, llama_token_to_piece(ctx, token).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -151,8 +151,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
|
|||
ggml_allocr_alloc(alloc, model->tok_embeddings);
|
||||
ggml_allocr_alloc(alloc, model->norm);
|
||||
ggml_allocr_alloc(alloc, model->output);
|
||||
for (uint32_t i = 0; i < model->layers.size(); ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
for (auto& layer : model->layers) {
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm);
|
||||
ggml_allocr_alloc(alloc, layer.wq);
|
||||
ggml_allocr_alloc(alloc, layer.wk);
|
||||
|
@ -166,8 +165,7 @@ static void alloc_model(struct ggml_allocr * alloc, struct my_llama_model * mode
|
|||
ggml_allocr_alloc(alloc, model->tok_embeddings->grad);
|
||||
ggml_allocr_alloc(alloc, model->norm->grad);
|
||||
ggml_allocr_alloc(alloc, model->output->grad);
|
||||
for (uint32_t i = 0; i < model->layers.size(); ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
for (auto& layer : model->layers) {
|
||||
ggml_allocr_alloc(alloc, layer.attention_norm->grad);
|
||||
ggml_allocr_alloc(alloc, layer.wq->grad);
|
||||
ggml_allocr_alloc(alloc, layer.wk->grad);
|
||||
|
@ -453,9 +451,9 @@ static struct ggml_tensor * llama_build_train_graphs(
|
|||
|
||||
// allocating checkpoints in one block to reduce memory fragmentation
|
||||
// note: they will be freed in reverse order
|
||||
for (int i = 0; i < (int) checkpoints.size(); ++i) {
|
||||
if (checkpoints[i]->data == NULL && checkpoints[i]->view_src == NULL) {
|
||||
ggml_allocr_alloc(alloc, checkpoints[i]);
|
||||
for (auto& checkpoint : checkpoints) {
|
||||
if (checkpoint->data == NULL && checkpoint->view_src == NULL) {
|
||||
ggml_allocr_alloc(alloc, checkpoint);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -925,7 +923,7 @@ struct save_train_files_data {
|
|||
};
|
||||
|
||||
static void save_train_files(void * vdata, struct train_state * train) {
|
||||
struct save_train_files_data * data = (struct save_train_files_data *) vdata;
|
||||
auto data = (struct save_train_files_data *) vdata;
|
||||
int64_t iter = train->opt->iter;
|
||||
|
||||
if (strlen(data->fn_checkpoint_out) > 0) {
|
||||
|
@ -945,8 +943,7 @@ static int64_t get_parameter_count(struct my_llama_model* model) {
|
|||
nx += ggml_nelements(model->norm);
|
||||
nx += ggml_nelements(model->output);
|
||||
|
||||
for (uint32_t i = 0; i < model->layers.size(); ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
for (auto& layer : model->layers) {
|
||||
nx += ggml_nelements(layer.attention_norm);
|
||||
nx += ggml_nelements(layer.wq);
|
||||
nx += ggml_nelements(layer.wk);
|
||||
|
|
38
llama.cpp
38
llama.cpp
|
@ -1092,7 +1092,7 @@ struct llama_mlock {
|
|||
|
||||
bool failed_already = false;
|
||||
|
||||
llama_mlock() {}
|
||||
llama_mlock() = default;
|
||||
llama_mlock(const llama_mlock &) = delete;
|
||||
|
||||
~llama_mlock() {
|
||||
|
@ -2958,7 +2958,7 @@ static void llm_load_hparams(
|
|||
}
|
||||
|
||||
// TODO: This should probably be in llama.h
|
||||
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special = false);
|
||||
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special = false);
|
||||
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch);
|
||||
|
||||
static void llm_load_vocab(
|
||||
|
@ -7111,8 +7111,8 @@ struct llm_tokenizer_bpe {
|
|||
const auto token = vocab.token_to_id.find(str);
|
||||
|
||||
if (token == vocab.token_to_id.end()) {
|
||||
for (auto j = str.begin(); j != str.end(); ++j) {
|
||||
std::string byte_str(1, *j);
|
||||
for (char j : str) {
|
||||
std::string byte_str(1, j);
|
||||
auto token_multibyte = vocab.token_to_id.find(byte_str);
|
||||
if (token_multibyte == vocab.token_to_id.end()) {
|
||||
throw std::runtime_error("ERROR: byte not found in vocab");
|
||||
|
@ -7172,8 +7172,8 @@ private:
|
|||
bpe_encoded_words.reserve(text.size());
|
||||
|
||||
auto cps = codepoints_from_utf8(text);
|
||||
for (size_t i = 0; i < cps.size(); ++i)
|
||||
text_utf.emplace_back(codepoint_to_utf8(cps[i]));
|
||||
for (unsigned int cp : cps)
|
||||
text_utf.emplace_back(codepoint_to_utf8(cp));
|
||||
|
||||
for (int i = 0; i < (int)text_utf.size(); i++) {
|
||||
const std::string & utf_char = text_utf[i];
|
||||
|
@ -7344,7 +7344,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
|
|||
const auto & special_id = st.second;
|
||||
|
||||
// for each text fragment
|
||||
std::forward_list<fragment_buffer_variant>::iterator it = buffer.begin();
|
||||
auto it = buffer.begin();
|
||||
while (it != buffer.end()) {
|
||||
auto & fragment = (*it);
|
||||
|
||||
|
@ -7431,7 +7431,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
|
|||
}
|
||||
}
|
||||
|
||||
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos, bool special) {
|
||||
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, const std::string& raw_text, bool bos, bool special) {
|
||||
std::vector<llama_vocab::id> output;
|
||||
|
||||
// OG tokenizer behavior:
|
||||
|
@ -7887,7 +7887,7 @@ void llama_grammar_free(struct llama_grammar * grammar) {
|
|||
}
|
||||
|
||||
struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar) {
|
||||
llama_grammar * result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 };
|
||||
auto result = new llama_grammar{ grammar->rules, grammar->stacks, grammar->partial_utf8 };
|
||||
|
||||
// redirect elements in stacks to point to new rules
|
||||
for (size_t is = 0; is < result->stacks.size(); is++) {
|
||||
|
@ -8095,8 +8095,8 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
|
|||
}
|
||||
|
||||
// Calculate absolute value of second derivatives
|
||||
for (size_t i = 0; i < second_derivatives.size(); ++i) {
|
||||
second_derivatives[i] = std::abs(second_derivatives[i]);
|
||||
for (float& second_derivative : second_derivatives) {
|
||||
second_derivative = std::abs(second_derivative);
|
||||
}
|
||||
|
||||
// Normalize the second derivatives
|
||||
|
@ -9412,8 +9412,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
|
||||
if (tot_count > 0) {
|
||||
LLAMA_LOG_INFO(" | hist: ");
|
||||
for (size_t i = 0; i < hist_cur.size(); i++) {
|
||||
LLAMA_LOG_INFO("%5.3f ", hist_cur[i] / float(nelements));
|
||||
for (long long i : hist_cur) {
|
||||
LLAMA_LOG_INFO("%5.3f ", i / float(nelements));
|
||||
}
|
||||
}
|
||||
LLAMA_LOG_INFO("\n");
|
||||
|
@ -9448,14 +9448,14 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
// print histogram for all tensors
|
||||
{
|
||||
int64_t sum_all = 0;
|
||||
for (size_t i = 0; i < hist_all.size(); i++) {
|
||||
sum_all += hist_all[i];
|
||||
for (auto i : hist_all) {
|
||||
sum_all += i;
|
||||
}
|
||||
|
||||
if (sum_all > 0) {
|
||||
LLAMA_LOG_INFO("%s: hist: ", __func__);
|
||||
for (size_t i = 0; i < hist_all.size(); i++) {
|
||||
LLAMA_LOG_INFO("%5.3f ", hist_all[i] / float(sum_all));
|
||||
for (auto i : hist_all) {
|
||||
LLAMA_LOG_INFO("%5.3f ", i / float(sum_all));
|
||||
}
|
||||
LLAMA_LOG_INFO("\n");
|
||||
}
|
||||
|
@ -9859,7 +9859,7 @@ struct llama_model * llama_load_model_from_file(
|
|||
struct llama_model_params params) {
|
||||
ggml_time_init();
|
||||
|
||||
llama_model * model = new llama_model;
|
||||
auto model = new llama_model;
|
||||
|
||||
unsigned cur_percentage = 0;
|
||||
if (params.progress_callback == NULL) {
|
||||
|
@ -9905,7 +9905,7 @@ struct llama_context * llama_new_context_with_model(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
llama_context * ctx = new llama_context(*model);
|
||||
auto ctx = new llama_context(*model);
|
||||
|
||||
const auto & hparams = model->hparams;
|
||||
auto & cparams = ctx->cparams;
|
||||
|
|
|
@ -256,7 +256,7 @@ enum test_mode {
|
|||
};
|
||||
|
||||
struct test_case {
|
||||
virtual ~test_case() {}
|
||||
virtual ~test_case() = default;
|
||||
|
||||
virtual std::string op_desc(ggml_tensor * t) {
|
||||
return ggml_op_desc(t);
|
||||
|
@ -281,9 +281,9 @@ struct test_case {
|
|||
virtual size_t op_size(ggml_tensor * t) {
|
||||
size_t size = ggml_nbytes(t);
|
||||
// add source tensors
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
if (t->src[i] != NULL) {
|
||||
size += ggml_nbytes(t->src[i]);
|
||||
for (auto& el : t->src) {
|
||||
if (el) {
|
||||
size += ggml_nbytes(el);
|
||||
}
|
||||
}
|
||||
return size;
|
||||
|
@ -416,7 +416,7 @@ struct test_case {
|
|||
};
|
||||
|
||||
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
|
||||
callback_userdata * ud = (callback_userdata *) user_data;
|
||||
auto ud = (callback_userdata *) user_data;
|
||||
const char * bn1 = ggml_backend_name(ud->backend1);
|
||||
const char * bn2 = ggml_backend_name(ud->backend2);
|
||||
|
||||
|
|
|
@ -583,7 +583,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// mean, not yet fully implemented
|
||||
if(0)
|
||||
if(false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 1;
|
||||
|
@ -601,7 +601,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// argmax
|
||||
if (0)
|
||||
if (false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 1;
|
||||
|
@ -732,7 +732,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// tanh, not yet fully implemented
|
||||
if(0)
|
||||
if(false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 1;
|
||||
|
@ -787,7 +787,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// elu, not yet fully implemented
|
||||
if(0)
|
||||
if(false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 1;
|
||||
|
@ -822,7 +822,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// gelu, not yet fully implemented
|
||||
if(0)
|
||||
if(false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 1;
|
||||
|
@ -1559,7 +1559,7 @@ int main(int argc, const char ** argv) {
|
|||
}
|
||||
|
||||
// flash_attn f16, not yet fully implemented
|
||||
if(0)
|
||||
if(false)
|
||||
{
|
||||
srand(seed);
|
||||
const int nargs = 3;
|
||||
|
|
|
@ -29,10 +29,9 @@ term ::= [0-9]+)""";
|
|||
};
|
||||
|
||||
uint32_t index = 0;
|
||||
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
||||
{
|
||||
std::string key = it->first;
|
||||
uint32_t value = it->second;
|
||||
for (auto& symbol_id : parsed_grammar.symbol_ids) {
|
||||
std::string key = symbol_id.first;
|
||||
uint32_t value = symbol_id.second;
|
||||
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
|
@ -88,9 +87,7 @@ term ::= [0-9]+)""";
|
|||
for (auto rule : parsed_grammar.rules)
|
||||
{
|
||||
// compare rule to expected rule
|
||||
for (uint32_t i = 0; i < rule.size(); i++)
|
||||
{
|
||||
llama_grammar_element element = rule[i];
|
||||
for (auto element : rule) {
|
||||
llama_grammar_element expected_element = expected_rules[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
|
@ -135,10 +132,9 @@ term ::= [0-9]+)""";
|
|||
};
|
||||
|
||||
index = 0;
|
||||
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
||||
{
|
||||
std::string key = it->first;
|
||||
uint32_t value = it->second;
|
||||
for (auto& symbol_id : parsed_grammar.symbol_ids) {
|
||||
std::string key = symbol_id.first;
|
||||
uint32_t value = symbol_id.second;
|
||||
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
|
@ -227,9 +223,7 @@ term ::= [0-9]+)""";
|
|||
for (auto rule : parsed_grammar.rules)
|
||||
{
|
||||
// compare rule to expected rule
|
||||
for (uint32_t i = 0; i < rule.size(); i++)
|
||||
{
|
||||
llama_grammar_element element = rule[i];
|
||||
for (auto element : rule) {
|
||||
llama_grammar_element expected_element = expected_rules[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
|
|
|
@ -98,14 +98,14 @@ int main()
|
|||
},
|
||||
};
|
||||
|
||||
for (auto pair : expected)
|
||||
for (const auto& pair : expected)
|
||||
{
|
||||
parsed_grammar.symbol_ids[pair.first] = pair.second;
|
||||
}
|
||||
|
||||
for (auto rule : expected_rules)
|
||||
for (const auto& rule : expected_rules)
|
||||
{
|
||||
parsed_grammar.rules.push_back({});
|
||||
parsed_grammar.rules.emplace_back();
|
||||
for (auto element : rule)
|
||||
{
|
||||
parsed_grammar.rules.back().push_back(element);
|
||||
|
|
|
@ -126,7 +126,7 @@ int main(int argc, char * argv[]) {
|
|||
bool failed = false;
|
||||
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
auto type = (ggml_type) i;
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
|
||||
// deprecated - skip
|
||||
|
|
|
@ -269,7 +269,7 @@ int main(int argc, char * argv[]) {
|
|||
struct ggml_context * ctx = ggml_init(ggml_params);
|
||||
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
auto type = (ggml_type) i;
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
|
||||
continue;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue