no longer necessary to disambiguate common functions with ::

This commit is contained in:
slaren 2024-10-09 12:46:18 +02:00
parent e58d3b1214
commit aee57d44c6
21 changed files with 48 additions and 48 deletions

View file

@ -51,7 +51,7 @@ int main(int argc, char ** argv) {
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> tokens_list; std::vector<llama_token> tokens_list;
tokens_list = ::common_tokenize(model, params.prompt, true); tokens_list = common_tokenize(model, params.prompt, true);
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel; const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel;

View file

@ -272,8 +272,8 @@ struct tokenized_prompt {
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) { tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
const bool add_bos = llama_add_bos_token(llama_get_model(ctx)); const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
tokens_pos = ::common_tokenize(ctx, pos, add_bos, true); tokens_pos = common_tokenize(ctx, pos, add_bos, true);
tokens_neg = ::common_tokenize(ctx, neg, add_bos, true); tokens_neg = common_tokenize(ctx, neg, add_bos, true);
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size()); max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
padding_seq(ctx, tokens_pos, max_seq_len); padding_seq(ctx, tokens_pos, max_seq_len);
padding_seq(ctx, tokens_neg, max_seq_len); padding_seq(ctx, tokens_neg, max_seq_len);
@ -281,7 +281,7 @@ struct tokenized_prompt {
void padding_seq(llama_context * ctx, std::vector<llama_token> & tokens, size_t len) { void padding_seq(llama_context * ctx, std::vector<llama_token> & tokens, size_t len) {
// TODO: customize padding token // TODO: customize padding token
std::vector<llama_token> pad_tokens = ::common_tokenize(ctx, " ", false); std::vector<llama_token> pad_tokens = common_tokenize(ctx, " ", false);
llama_token pad_tok = pad_tokens.back(); llama_token pad_tok = pad_tokens.back();
while (tokens.size() < len) { while (tokens.size() < len) {
tokens.push_back(pad_tok); tokens.push_back(pad_tok);

View file

@ -135,7 +135,7 @@ int main(int argc, char ** argv) {
// tokenize the prompts and trim // tokenize the prompts and trim
std::vector<std::vector<int32_t>> inputs; std::vector<std::vector<int32_t>> inputs;
for (const auto & prompt : prompts) { for (const auto & prompt : prompts) {
auto inp = ::common_tokenize(ctx, prompt, true, true); auto inp = common_tokenize(ctx, prompt, true, true);
if (inp.size() > n_batch) { if (inp.size() > n_batch) {
LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n", LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
__func__, (long long int) inp.size(), (long long int) n_batch); __func__, (long long int) inp.size(), (long long int) n_batch);

View file

@ -129,7 +129,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
static bool run(llama_context * ctx, const gpt_params & params) { static bool run(llama_context * ctx, const gpt_params & params) {
const bool add_bos = llama_add_bos_token(llama_get_model(ctx)); const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, add_bos); std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, add_bos);
if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), 0, 0))) { if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), 0, 0))) {
LOG_ERR("%s : failed to eval\n", __func__); LOG_ERR("%s : failed to eval\n", __func__);

View file

@ -436,7 +436,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
auto tim1 = std::chrono::high_resolution_clock::now(); auto tim1 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenizing the input ..\n", __func__); LOG_INF("%s: tokenizing the input ..\n", __func__);
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true); std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
auto tim2 = std::chrono::high_resolution_clock::now(); auto tim2 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count()); LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());

View file

@ -202,8 +202,8 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd_inp; std::vector<llama_token> embd_inp;
std::vector<llama_token> embd_end; std::vector<llama_token> embd_end;
std::vector<llama_token> inp_pfx = ::common_tokenize(ctx, params.input_prefix, false); std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
std::vector<llama_token> inp_sfx = ::common_tokenize(ctx, params.input_suffix, false); std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
GGML_ASSERT(llama_token_prefix(model) >= 0); GGML_ASSERT(llama_token_prefix(model) >= 0);
GGML_ASSERT(llama_token_suffix(model) >= 0); GGML_ASSERT(llama_token_suffix(model) >= 0);
@ -505,8 +505,8 @@ int main(int argc, char ** argv) {
} }
// tokenize new prefix and suffix // tokenize new prefix and suffix
std::vector<llama_token> inp_pfx = ::common_tokenize(ctx, params.input_prefix, false); std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
std::vector<llama_token> inp_sfx = ::common_tokenize(ctx, params.input_suffix, false); std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
@ -579,7 +579,7 @@ int main(int argc, char ** argv) {
const size_t original_size = embd_inp.size(); const size_t original_size = embd_inp.size();
const auto line_inp = ::common_tokenize(ctx, buffer, false); const auto line_inp = common_tokenize(ctx, buffer, false);
LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str()); LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str());
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());

View file

@ -37,7 +37,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){ static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
std::string str2 = str; std::string str2 = str;
std::vector<llama_token> embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true); std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
eval_tokens(ctx_llama, embd_inp, n_batch, n_past); eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
return true; return true;
} }
@ -159,14 +159,14 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
user_prompt = prompt.substr(image_pos + std::string("<image>").length()); user_prompt = prompt.substr(image_pos + std::string("<image>").length());
LOG_INF("system_prompt: %s\n", system_prompt.c_str()); LOG_INF("system_prompt: %s\n", system_prompt.c_str());
if (params->verbose_prompt) { if (params->verbose_prompt) {
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
} }
} }
LOG_INF("user_prompt: %s\n", user_prompt.c_str()); LOG_INF("user_prompt: %s\n", user_prompt.c_str());
if (params->verbose_prompt) { if (params->verbose_prompt) {
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
} }
@ -176,7 +176,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:"; system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
user_prompt = prompt + "\nASSISTANT:"; user_prompt = prompt + "\nASSISTANT:";
if (params->verbose_prompt) { if (params->verbose_prompt) {
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
} }

View file

@ -114,7 +114,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){ static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
std::string str2 = str; std::string str2 = str;
std::vector<llama_token> embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true); std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
return eval_tokens(ctx_llama, embd_inp, n_batch, n_past); return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
} }

View file

@ -65,7 +65,7 @@ int main(int argc, char ** argv) {
std::vector<llama_token> inp; std::vector<llama_token> inp;
std::vector<llama_token> all; std::vector<llama_token> all;
inp = ::common_tokenize(ctx, params.prompt, true, true); inp = common_tokenize(ctx, params.prompt, true, true);
all = inp; all = inp;
const int max_context_size = llama_n_ctx(ctx); const int max_context_size = llama_n_ctx(ctx);

View file

@ -31,7 +31,7 @@ int main(int argc, char ** argv){
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> inp; std::vector<llama_token> inp;
inp = ::common_tokenize(ctx, params.prompt, true, true); inp = common_tokenize(ctx, params.prompt, true, true);
fprintf(stderr, "%s: tokenization done\n", __func__); fprintf(stderr, "%s: tokenization done\n", __func__);

View file

@ -35,7 +35,7 @@ int main(int argc, char ** argv){
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> inp; std::vector<llama_token> inp;
inp = ::common_tokenize(ctx, params.prompt, true, true); inp = common_tokenize(ctx, params.prompt, true, true);
llama_ngram_cache ngram_cache_context; llama_ngram_cache ngram_cache_context;
llama_ngram_cache ngram_cache_dynamic; llama_ngram_cache ngram_cache_dynamic;

View file

@ -38,7 +38,7 @@ int main(int argc, char ** argv){
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> inp; std::vector<llama_token> inp;
inp = ::common_tokenize(ctx, params.prompt, true, true); inp = common_tokenize(ctx, params.prompt, true, true);
llama_ngram_cache ngram_cache_context; llama_ngram_cache ngram_cache_context;
llama_ngram_cache ngram_cache_dynamic; llama_ngram_cache ngram_cache_dynamic;

View file

@ -296,7 +296,7 @@ int main(int argc, char ** argv) {
: params.prompt; : params.prompt;
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) { if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
LOG_DBG("tokenize the prompt\n"); LOG_DBG("tokenize the prompt\n");
embd_inp = ::common_tokenize(ctx, prompt, true, true); embd_inp = common_tokenize(ctx, prompt, true, true);
} else { } else {
LOG_DBG("use session tokens\n"); LOG_DBG("use session tokens\n");
embd_inp = session_tokens; embd_inp = session_tokens;
@ -415,7 +415,7 @@ int main(int argc, char ** argv) {
for (const auto & antiprompt : params.antiprompt) { for (const auto & antiprompt : params.antiprompt) {
LOG_INF("Reverse prompt: '%s'\n", antiprompt.c_str()); LOG_INF("Reverse prompt: '%s'\n", antiprompt.c_str());
if (params.verbose_prompt) { if (params.verbose_prompt) {
auto tmp = ::common_tokenize(ctx, antiprompt, false, true); auto tmp = common_tokenize(ctx, antiprompt, false, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
} }
@ -430,7 +430,7 @@ int main(int argc, char ** argv) {
if (!params.input_prefix.empty()) { if (!params.input_prefix.empty()) {
LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str()); LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str());
if (params.verbose_prompt) { if (params.verbose_prompt) {
auto tmp = ::common_tokenize(ctx, params.input_prefix, true, true); auto tmp = common_tokenize(ctx, params.input_prefix, true, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
} }
@ -440,7 +440,7 @@ int main(int argc, char ** argv) {
if (!params.input_suffix.empty()) { if (!params.input_suffix.empty()) {
LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str()); LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str());
if (params.verbose_prompt) { if (params.verbose_prompt) {
auto tmp = ::common_tokenize(ctx, params.input_suffix, false, true); auto tmp = common_tokenize(ctx, params.input_suffix, false, true);
for (int i = 0; i < (int) tmp.size(); i++) { for (int i = 0; i < (int) tmp.size(); i++) {
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
} }
@ -788,7 +788,7 @@ int main(int argc, char ** argv) {
if (params.interactive) { if (params.interactive) {
if (!params.antiprompt.empty()) { if (!params.antiprompt.empty()) {
// tokenize and inject first reverse prompt // tokenize and inject first reverse prompt
const auto first_antiprompt = ::common_tokenize(ctx, params.antiprompt.front(), false, true); const auto first_antiprompt = common_tokenize(ctx, params.antiprompt.front(), false, true);
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
is_antiprompt = true; is_antiprompt = true;
} }
@ -862,9 +862,9 @@ int main(int argc, char ** argv) {
? chat_add_and_format(model, chat_msgs, "user", std::move(buffer)) ? chat_add_and_format(model, chat_msgs, "user", std::move(buffer))
: std::move(buffer); : std::move(buffer);
// TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix) // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
const auto line_pfx = ::common_tokenize(ctx, params.input_prefix, false, true); const auto line_pfx = common_tokenize(ctx, params.input_prefix, false, true);
const auto line_inp = ::common_tokenize(ctx, user_inp, false, format_chat); const auto line_inp = common_tokenize(ctx, user_inp, false, format_chat);
const auto line_sfx = ::common_tokenize(ctx, params.input_suffix, false, true); const auto line_sfx = common_tokenize(ctx, params.input_suffix, false, true);
LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str()); LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str());

View file

@ -164,7 +164,7 @@ int main(int argc, char ** argv) {
} }
std::vector<llama_token> tokens_system; std::vector<llama_token> tokens_system;
tokens_system = ::common_tokenize(ctx, k_system, true); tokens_system = common_tokenize(ctx, k_system, true);
const int32_t n_tokens_system = tokens_system.size(); const int32_t n_tokens_system = tokens_system.size();
llama_seq_id g_seq_id = 0; llama_seq_id g_seq_id = 0;
@ -256,7 +256,7 @@ int main(int argc, char ** argv) {
// do not prepend BOS because we have a system prompt! // do not prepend BOS because we have a system prompt!
std::vector<llama_token> tokens_prompt; std::vector<llama_token> tokens_prompt;
tokens_prompt = ::common_tokenize(ctx, client.prompt, false); tokens_prompt = common_tokenize(ctx, client.prompt, false);
for (size_t i = 0; i < tokens_prompt.size(); ++i) { for (size_t i = 0; i < tokens_prompt.size(); ++i) {
common_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false); common_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);

View file

@ -92,10 +92,10 @@ int main(int argc, char ** argv) {
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> tokens_list; std::vector<llama_token> tokens_list;
tokens_list = ::common_tokenize(ctx, params.prompt, true); tokens_list = common_tokenize(ctx, params.prompt, true);
// tokenize the prefix and use it as a sink // tokenize the prefix and use it as a sink
const int n_tokens_prefix = ::common_tokenize(ctx, prompt_prefix, true).size(); const int n_tokens_prefix = common_tokenize(ctx, prompt_prefix, true).size();
const int n_tokens_all = tokens_list.size(); const int n_tokens_all = tokens_list.size();

View file

@ -348,7 +348,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
LOG_INF("%s: tokenizing the input ..\n", __func__); LOG_INF("%s: tokenizing the input ..\n", __func__);
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true); std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
const int n_ctx = llama_n_ctx(ctx); const int n_ctx = llama_n_ctx(ctx);
@ -500,7 +500,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
auto tim1 = std::chrono::high_resolution_clock::now(); auto tim1 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenizing the input ..\n", __func__); LOG_INF("%s: tokenizing the input ..\n", __func__);
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true); std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
auto tim2 = std::chrono::high_resolution_clock::now(); auto tim2 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count()); LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
@ -844,7 +844,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
hs_cur.gold_ending_idx = std::stoi( prompt_lines[idx*6+1] ); hs_cur.gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
for (size_t j = 0; j < 4; j++) { for (size_t j = 0; j < 4; j++) {
hs_cur.ending[j] = prompt_lines[idx*6+2+j]; hs_cur.ending[j] = prompt_lines[idx*6+2+j];
hs_cur.seq_tokens[j] = ::common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true); hs_cur.seq_tokens[j] = common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true);
} }
// determine the common prefix of the endings // determine the common prefix of the endings
@ -1136,8 +1136,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
LOG_INF("%s : tokenizing selected tasks\n", __func__); LOG_INF("%s : tokenizing selected tasks\n", __func__);
for (auto & task : data) { for (auto & task : data) {
task.seq_tokens[0] = ::common_tokenize(ctx, task.first + task.choices[0] + task.second, true); task.seq_tokens[0] = common_tokenize(ctx, task.first + task.choices[0] + task.second, true);
task.seq_tokens[1] = ::common_tokenize(ctx, task.first + task.choices[1] + task.second, true); task.seq_tokens[1] = common_tokenize(ctx, task.first + task.choices[1] + task.second, true);
task.common_prefix = 0; task.common_prefix = 0;
for (size_t k = 0; k < task.seq_tokens[0].size(); k++) { for (size_t k = 0; k < task.seq_tokens[0].size(); k++) {
@ -1152,8 +1152,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
task.seq_tokens[0].size() - task.common_prefix + task.seq_tokens[0].size() - task.common_prefix +
task.seq_tokens[1].size() - task.common_prefix; task.seq_tokens[1].size() - task.common_prefix;
task.n_base1 = ::common_tokenize(ctx, task.first + task.choices[0], true).size(); task.n_base1 = common_tokenize(ctx, task.first + task.choices[0], true).size();
task.n_base2 = ::common_tokenize(ctx, task.first + task.choices[1], true).size(); task.n_base2 = common_tokenize(ctx, task.first + task.choices[1], true).size();
} }
LOG_INF("%s : calculating winogrande score over selected tasks.\n", __func__); LOG_INF("%s : calculating winogrande score over selected tasks.\n", __func__);

View file

@ -185,7 +185,7 @@ int main(int argc, char ** argv) {
// tokenize the prompts and trim // tokenize the prompts and trim
for (auto & chunk : chunks) { for (auto & chunk : chunks) {
auto inp = ::common_tokenize(ctx, chunk.textdata, true, false); auto inp = common_tokenize(ctx, chunk.textdata, true, false);
if (inp.size() > n_batch) { if (inp.size() > n_batch) {
LOG_ERR("%s: chunk size (%lld) exceeds batch size (%lld), increase batch size and re-run\n", LOG_ERR("%s: chunk size (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
__func__, (long long int) inp.size(), (long long int) n_batch); __func__, (long long int) inp.size(), (long long int) n_batch);

View file

@ -771,10 +771,10 @@ struct server_context {
std::vector<llama_token> p; std::vector<llama_token> p;
if (first) { if (first) {
p = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); p = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
first = false; first = false;
} else { } else {
p = ::common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
} }
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
@ -788,7 +788,7 @@ struct server_context {
} }
} else { } else {
auto s = json_prompt.template get<std::string>(); auto s = json_prompt.template get<std::string>();
prompt_tokens = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); prompt_tokens = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
} }
return prompt_tokens; return prompt_tokens;
@ -1073,7 +1073,7 @@ struct server_context {
system_tokens.clear(); system_tokens.clear();
if (!system_prompt.empty()) { if (!system_prompt.empty()) {
system_tokens = ::common_tokenize(ctx, system_prompt, true); system_tokens = common_tokenize(ctx, system_prompt, true);
const int32_t n_batch = llama_n_batch(ctx); const int32_t n_batch = llama_n_batch(ctx);
const int32_t n_tokens_prompt = system_tokens.size(); const int32_t n_tokens_prompt = system_tokens.size();

View file

@ -64,7 +64,7 @@ int main(int argc, char ** argv) {
// tokenize the prompt // tokenize the prompt
std::vector<llama_token> tokens_list; std::vector<llama_token> tokens_list;
tokens_list = ::common_tokenize(ctx, params.prompt, true); tokens_list = common_tokenize(ctx, params.prompt, true);
const int n_ctx = llama_n_ctx(ctx); const int n_ctx = llama_n_ctx(ctx);
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size()); const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size());

View file

@ -134,7 +134,7 @@ int main(int argc, char ** argv) {
// Tokenize the prompt // Tokenize the prompt
std::vector<llama_token> inp; std::vector<llama_token> inp;
inp = ::common_tokenize(ctx_tgt, params.prompt, true, true); inp = common_tokenize(ctx_tgt, params.prompt, true, true);
const int max_context_size = llama_n_ctx(ctx_tgt); const int max_context_size = llama_n_ctx(ctx_tgt);
const int max_tokens_list_size = max_context_size - 4; const int max_tokens_list_size = max_context_size - 4;

View file

@ -365,7 +365,7 @@ int main(int raw_argc, char ** raw_argv) {
const bool parse_special = !no_parse_special; const bool parse_special = !no_parse_special;
std::vector<llama_token> tokens; std::vector<llama_token> tokens;
tokens = ::common_tokenize(model, prompt, add_bos, parse_special); tokens = common_tokenize(model, prompt, add_bos, parse_special);
if (printing_ids) { if (printing_ids) {
printf("["); printf("[");