common : use common_ prefix for common library functions (#9805)
* common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
0e9f760eb1
commit
7eee341bee
45 changed files with 1284 additions and 1284 deletions
|
@ -35,7 +35,7 @@ struct results_log_softmax {
|
|||
};
|
||||
|
||||
static void write_logfile(
|
||||
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
||||
const llama_context * ctx, const common_params & params, const llama_model * model,
|
||||
const struct results_perplexity & results
|
||||
) {
|
||||
if (params.logdir.empty()) {
|
||||
|
@ -339,7 +339,7 @@ static void process_logits(int n_vocab, const float * logits, const int * tokens
|
|||
}
|
||||
}
|
||||
|
||||
static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & params) {
|
||||
static results_perplexity perplexity_v2(llama_context * ctx, const common_params & params) {
|
||||
// Download: https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
|
||||
// Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||
// Output: `perplexity: 13.5106 [114/114]`
|
||||
|
@ -350,7 +350,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
|
|||
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
|
@ -474,7 +474,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
|
|||
return {tokens, std::exp(nll / count), logit_history, prob_history};
|
||||
}
|
||||
|
||||
static results_perplexity perplexity(llama_context * ctx, const gpt_params & params, const int32_t n_ctx) {
|
||||
static results_perplexity perplexity(llama_context * ctx, const common_params & params, const int32_t n_ctx) {
|
||||
if (params.ppl_stride > 0) {
|
||||
return perplexity_v2(ctx, params);
|
||||
}
|
||||
|
@ -502,7 +502,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
|||
auto tim1 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
auto tim2 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
||||
|
@ -772,7 +772,7 @@ static void compute_logprobs(const float * batch_logits, int n_vocab, std::vecto
|
|||
}
|
||||
}
|
||||
|
||||
static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
static void hellaswag_score(llama_context * ctx, const common_params & params) {
|
||||
// Calculates hellaswag score (acc_norm) from prompt
|
||||
//
|
||||
// Data extracted from the HellaSwag validation dataset (MIT license) https://github.com/rowanz/hellaswag/blob/master/data/hellaswag_val.jsonl
|
||||
|
@ -853,7 +853,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
hs_cur.gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
|
||||
for (size_t j = 0; j < 4; j++) {
|
||||
hs_cur.ending[j] = prompt_lines[idx*6+2+j];
|
||||
hs_cur.seq_tokens[j] = ::llama_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true);
|
||||
hs_cur.seq_tokens[j] = common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true);
|
||||
}
|
||||
|
||||
// determine the common prefix of the endings
|
||||
|
@ -910,7 +910,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
size_t i1 = i0;
|
||||
size_t i_logits = 0; // this tells us how many logits were needed before this point in the batch
|
||||
|
||||
llama_batch_clear(batch);
|
||||
common_batch_clear(batch);
|
||||
|
||||
// batch as much tasks as possible into the available context
|
||||
// each task has 4 unique sequence ids - one for each ending
|
||||
|
@ -926,7 +926,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < hs_cur.common_prefix; ++i) {
|
||||
llama_batch_add(batch, hs_cur.seq_tokens[0][i], i, { s0 + 0, s0 + 1, s0 + 2, s0 + 3 }, false);
|
||||
common_batch_add(batch, hs_cur.seq_tokens[0][i], i, { s0 + 0, s0 + 1, s0 + 2, s0 + 3 }, false);
|
||||
}
|
||||
batch.logits[batch.n_tokens - 1] = true; // we need logits for the last token of the common prefix
|
||||
n_logits += 1;
|
||||
|
@ -936,7 +936,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
// TODO: don't evaluate the last token of each sequence
|
||||
for (size_t i = hs_cur.common_prefix; i < seq_tokens_size; ++i) {
|
||||
const bool needs_logits = i < seq_tokens_size - 1;
|
||||
llama_batch_add(batch, hs_cur.seq_tokens[s][i], i, { s0 + s }, needs_logits);
|
||||
common_batch_add(batch, hs_cur.seq_tokens[s][i], i, { s0 + s }, needs_logits);
|
||||
n_logits += needs_logits;
|
||||
}
|
||||
}
|
||||
|
@ -1112,7 +1112,7 @@ static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string
|
|||
* 0,Sarah was a much better surgeon than Maria so _ always got the easier cases.,Sarah,Maria,2
|
||||
*
|
||||
*/
|
||||
static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
||||
static void winogrande_score(llama_context * ctx, const common_params & params) {
|
||||
|
||||
constexpr int k_min_trailing_ctx = 3;
|
||||
|
||||
|
@ -1146,8 +1146,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
LOG_INF("%s : tokenizing selected tasks\n", __func__);
|
||||
|
||||
for (auto & task : data) {
|
||||
task.seq_tokens[0] = ::llama_tokenize(ctx, task.first + task.choices[0] + task.second, true);
|
||||
task.seq_tokens[1] = ::llama_tokenize(ctx, task.first + task.choices[1] + task.second, true);
|
||||
task.seq_tokens[0] = common_tokenize(ctx, task.first + task.choices[0] + task.second, true);
|
||||
task.seq_tokens[1] = common_tokenize(ctx, task.first + task.choices[1] + task.second, true);
|
||||
|
||||
task.common_prefix = 0;
|
||||
for (size_t k = 0; k < task.seq_tokens[0].size(); k++) {
|
||||
|
@ -1162,8 +1162,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
task.seq_tokens[0].size() - task.common_prefix +
|
||||
task.seq_tokens[1].size() - task.common_prefix;
|
||||
|
||||
task.n_base1 = ::llama_tokenize(ctx, task.first + task.choices[0], true).size();
|
||||
task.n_base2 = ::llama_tokenize(ctx, task.first + task.choices[1], true).size();
|
||||
task.n_base1 = common_tokenize(ctx, task.first + task.choices[0], true).size();
|
||||
task.n_base2 = common_tokenize(ctx, task.first + task.choices[1], true).size();
|
||||
}
|
||||
|
||||
LOG_INF("%s : calculating winogrande score over selected tasks.\n", __func__);
|
||||
|
@ -1195,7 +1195,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
size_t i1 = i0;
|
||||
size_t i_logits = 0;
|
||||
|
||||
llama_batch_clear(batch);
|
||||
common_batch_clear(batch);
|
||||
|
||||
while (n_cur + (int) data[i1].required_tokens <= n_ctx) {
|
||||
int n_logits = 0;
|
||||
|
@ -1205,7 +1205,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < data[i1].common_prefix; ++i) {
|
||||
llama_batch_add(batch, data[i1].seq_tokens[0][i], i, { s0 + 0, s0 + 1 }, false);
|
||||
common_batch_add(batch, data[i1].seq_tokens[0][i], i, { s0 + 0, s0 + 1 }, false);
|
||||
}
|
||||
batch.logits[batch.n_tokens - 1] = true;
|
||||
n_logits += 1;
|
||||
|
@ -1213,7 +1213,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
for (int s = 0; s < 2; ++s) {
|
||||
// TODO: end before the last token, no need to predict past the end of the sequences
|
||||
for (size_t i = data[i1].common_prefix; i < data[i1].seq_tokens[s].size(); ++i) {
|
||||
llama_batch_add(batch, data[i1].seq_tokens[s][i], i, { s0 + s }, true);
|
||||
common_batch_add(batch, data[i1].seq_tokens[s][i], i, { s0 + s }, true);
|
||||
n_logits += 1;
|
||||
}
|
||||
}
|
||||
|
@ -1370,7 +1370,7 @@ static bool multiple_choice_prepare_one_task(llama_context * ctx, multiple_choic
|
|||
}
|
||||
return false;
|
||||
}
|
||||
task.seq_tokens.emplace_back(::llama_tokenize(ctx, task.question + " " + answer, true));
|
||||
task.seq_tokens.emplace_back(::common_tokenize(ctx, task.question + " " + answer, true));
|
||||
}
|
||||
auto min_len = task.seq_tokens.front().size();
|
||||
for (auto& seq : task.seq_tokens) {
|
||||
|
@ -1414,7 +1414,7 @@ static bool multiple_choice_prepare_one_task(llama_context * ctx, multiple_choic
|
|||
// git@hf.co:datasets/Stevross/mmlu
|
||||
// https://huggingface.co/datasets/truthful_qa
|
||||
//
|
||||
static void multiple_choice_score(llama_context * ctx, const gpt_params & params) {
|
||||
static void multiple_choice_score(llama_context * ctx, const common_params & params) {
|
||||
|
||||
std::istringstream strstream(params.prompt);
|
||||
uint32_t n_task;
|
||||
|
@ -1548,7 +1548,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
|||
size_t i1 = i0;
|
||||
size_t i_logits = 0; // this tells us how many logits were needed before this point in the batch
|
||||
|
||||
llama_batch_clear(batch);
|
||||
common_batch_clear(batch);
|
||||
|
||||
// batch as much tasks as possible into the available context
|
||||
// each task has 4 unique sequence ids - one for each ending
|
||||
|
@ -1571,7 +1571,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
|||
|
||||
for (size_t i = 0; i < cur_task.common_prefix; ++i) {
|
||||
//llama_batch_add(batch, cur_task.seq_tokens[0][i], i, { s0 + 0, s0 + 1, s0 + 2, s0 + 3}, false);
|
||||
llama_batch_add(batch, cur_task.seq_tokens[0][i], i, batch_indeces, false);
|
||||
common_batch_add(batch, cur_task.seq_tokens[0][i], i, batch_indeces, false);
|
||||
}
|
||||
batch.logits[batch.n_tokens - 1] = true; // we need logits for the last token of the common prefix
|
||||
n_logits += 1;
|
||||
|
@ -1581,7 +1581,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
|||
// TODO: don't evaluate the last token of each sequence
|
||||
for (size_t i = cur_task.common_prefix; i < seq_tokens_size; ++i) {
|
||||
const bool needs_logits = i < seq_tokens_size - 1;
|
||||
llama_batch_add(batch, cur_task.seq_tokens[s][i], i, { s0 + s }, needs_logits);
|
||||
common_batch_add(batch, cur_task.seq_tokens[s][i], i, { s0 + s }, needs_logits);
|
||||
n_logits += needs_logits;
|
||||
}
|
||||
}
|
||||
|
@ -1695,7 +1695,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
|||
LOG_INF("\n");
|
||||
}
|
||||
|
||||
static void kl_divergence(llama_context * ctx, const gpt_params & params) {
|
||||
static void kl_divergence(llama_context * ctx, const common_params & params) {
|
||||
if (params.logits_file.empty()) {
|
||||
LOG_ERR("%s: you must provide a name of a file containing the log probabilities of the base model\n", __func__);
|
||||
return;
|
||||
|
@ -1968,17 +1968,17 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
|
|||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
common_params params;
|
||||
|
||||
params.n_ctx = 512;
|
||||
params.logits_all = true;
|
||||
params.escape = false;
|
||||
|
||||
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_PERPLEXITY)) {
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PERPLEXITY)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
gpt_init();
|
||||
common_init();
|
||||
|
||||
const int32_t n_ctx = params.n_ctx;
|
||||
|
||||
|
@ -2017,7 +2017,7 @@ int main(int argc, char ** argv) {
|
|||
llama_numa_init(params.numa);
|
||||
|
||||
// load the model and apply lora adapter, if any
|
||||
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
||||
common_init_result llama_init = common_init_from_params(params);
|
||||
|
||||
llama_model * model = llama_init.model;
|
||||
llama_context * ctx = llama_init.context;
|
||||
|
@ -2036,7 +2036,7 @@ int main(int argc, char ** argv) {
|
|||
// print system information
|
||||
{
|
||||
LOG_INF("\n");
|
||||
LOG_INF("%s\n", gpt_params_get_system_info(params).c_str());
|
||||
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
|
||||
}
|
||||
|
||||
struct results_perplexity results;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue