diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index da0d15e1d..fe12312e2 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -51,7 +51,7 @@ int main(int argc, char ** argv) { // tokenize the prompt std::vector tokens_list; - tokens_list = ::common_tokenize(model, params.prompt, true); + tokens_list = common_tokenize(model, params.prompt, true); const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel; diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index cdaf8f390..724ec9eaf 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -272,8 +272,8 @@ struct tokenized_prompt { tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) { const bool add_bos = llama_add_bos_token(llama_get_model(ctx)); - tokens_pos = ::common_tokenize(ctx, pos, add_bos, true); - tokens_neg = ::common_tokenize(ctx, neg, add_bos, true); + tokens_pos = common_tokenize(ctx, pos, add_bos, true); + tokens_neg = common_tokenize(ctx, neg, add_bos, true); max_seq_len = std::max(tokens_pos.size(), tokens_neg.size()); padding_seq(ctx, tokens_pos, max_seq_len); padding_seq(ctx, tokens_neg, max_seq_len); @@ -281,7 +281,7 @@ struct tokenized_prompt { void padding_seq(llama_context * ctx, std::vector & tokens, size_t len) { // TODO: customize padding token - std::vector pad_tokens = ::common_tokenize(ctx, " ", false); + std::vector pad_tokens = common_tokenize(ctx, " ", false); llama_token pad_tok = pad_tokens.back(); while (tokens.size() < len) { tokens.push_back(pad_tok); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index f5113ef85..2ce870613 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -135,7 +135,7 @@ int main(int argc, char ** argv) { // tokenize the prompts and trim std::vector> inputs; for (const auto & prompt : prompts) { - auto inp = ::common_tokenize(ctx, prompt, true, true); + auto inp = common_tokenize(ctx, prompt, true, true); if (inp.size() > n_batch) { LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n", __func__, (long long int) inp.size(), (long long int) n_batch); diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index ae08965c6..8a03d3bd6 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -129,7 +129,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) { static bool run(llama_context * ctx, const gpt_params & params) { const bool add_bos = llama_add_bos_token(llama_get_model(ctx)); - std::vector tokens = ::common_tokenize(ctx, params.prompt, add_bos); + std::vector tokens = common_tokenize(ctx, params.prompt, add_bos); if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), 0, 0))) { LOG_ERR("%s : failed to eval\n", __func__); diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 3a6e93344..041734774 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -436,7 +436,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { auto tim1 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenizing the input ..\n", __func__); - std::vector tokens = ::common_tokenize(ctx, params.prompt, true); + std::vector tokens = common_tokenize(ctx, params.prompt, true); auto tim2 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast(tim2-tim1).count()); diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 0f9e10893..143deed3b 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -202,8 +202,8 @@ int main(int argc, char ** argv) { std::vector embd_inp; std::vector embd_end; - std::vector inp_pfx = ::common_tokenize(ctx, params.input_prefix, false); - std::vector inp_sfx = ::common_tokenize(ctx, params.input_suffix, false); + std::vector inp_pfx = common_tokenize(ctx, params.input_prefix, false); + std::vector inp_sfx = common_tokenize(ctx, params.input_suffix, false); GGML_ASSERT(llama_token_prefix(model) >= 0); GGML_ASSERT(llama_token_suffix(model) >= 0); @@ -505,8 +505,8 @@ int main(int argc, char ** argv) { } // tokenize new prefix and suffix - std::vector inp_pfx = ::common_tokenize(ctx, params.input_prefix, false); - std::vector inp_sfx = ::common_tokenize(ctx, params.input_suffix, false); + std::vector inp_pfx = common_tokenize(ctx, params.input_prefix, false); + std::vector inp_sfx = common_tokenize(ctx, params.input_suffix, false); inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model)); inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model)); @@ -579,7 +579,7 @@ int main(int argc, char ** argv) { const size_t original_size = embd_inp.size(); - const auto line_inp = ::common_tokenize(ctx, buffer, false); + const auto line_inp = common_tokenize(ctx, buffer, false); LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str()); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 13d6daa93..16947a904 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -37,7 +37,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) { static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){ std::string str2 = str; - std::vector embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true); + std::vector embd_inp = common_tokenize(ctx_llama, str2, add_bos, true); eval_tokens(ctx_llama, embd_inp, n_batch, n_past); return true; } @@ -159,14 +159,14 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_ user_prompt = prompt.substr(image_pos + std::string("").length()); LOG_INF("system_prompt: %s\n", system_prompt.c_str()); if (params->verbose_prompt) { - auto tmp = ::common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); + auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); } } LOG_INF("user_prompt: %s\n", user_prompt.c_str()); if (params->verbose_prompt) { - auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); + auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); } @@ -176,7 +176,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_ system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:"; user_prompt = prompt + "\nASSISTANT:"; if (params->verbose_prompt) { - auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); + auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str()); } diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 4408f3d1b..5c960be7e 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -114,7 +114,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) { static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){ std::string str2 = str; - std::vector embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true); + std::vector embd_inp = common_tokenize(ctx_llama, str2, add_bos, true); return eval_tokens(ctx_llama, embd_inp, n_batch, n_past); } diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index f35628c47..59aaeccf8 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -65,7 +65,7 @@ int main(int argc, char ** argv) { std::vector inp; std::vector all; - inp = ::common_tokenize(ctx, params.prompt, true, true); + inp = common_tokenize(ctx, params.prompt, true, true); all = inp; const int max_context_size = llama_n_ctx(ctx); diff --git a/examples/lookup/lookup-create.cpp b/examples/lookup/lookup-create.cpp index 2232be369..f76b7581f 100644 --- a/examples/lookup/lookup-create.cpp +++ b/examples/lookup/lookup-create.cpp @@ -31,7 +31,7 @@ int main(int argc, char ** argv){ // tokenize the prompt std::vector inp; - inp = ::common_tokenize(ctx, params.prompt, true, true); + inp = common_tokenize(ctx, params.prompt, true, true); fprintf(stderr, "%s: tokenization done\n", __func__); diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index 27755b944..286a85c46 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -35,7 +35,7 @@ int main(int argc, char ** argv){ // tokenize the prompt std::vector inp; - inp = ::common_tokenize(ctx, params.prompt, true, true); + inp = common_tokenize(ctx, params.prompt, true, true); llama_ngram_cache ngram_cache_context; llama_ngram_cache ngram_cache_dynamic; diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index 5f4ff7853..d77d4754f 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -38,7 +38,7 @@ int main(int argc, char ** argv){ // tokenize the prompt std::vector inp; - inp = ::common_tokenize(ctx, params.prompt, true, true); + inp = common_tokenize(ctx, params.prompt, true, true); llama_ngram_cache ngram_cache_context; llama_ngram_cache ngram_cache_dynamic; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index cfd340f6c..34781582f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -296,7 +296,7 @@ int main(int argc, char ** argv) { : params.prompt; if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) { LOG_DBG("tokenize the prompt\n"); - embd_inp = ::common_tokenize(ctx, prompt, true, true); + embd_inp = common_tokenize(ctx, prompt, true, true); } else { LOG_DBG("use session tokens\n"); embd_inp = session_tokens; @@ -415,7 +415,7 @@ int main(int argc, char ** argv) { for (const auto & antiprompt : params.antiprompt) { LOG_INF("Reverse prompt: '%s'\n", antiprompt.c_str()); if (params.verbose_prompt) { - auto tmp = ::common_tokenize(ctx, antiprompt, false, true); + auto tmp = common_tokenize(ctx, antiprompt, false, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); } @@ -430,7 +430,7 @@ int main(int argc, char ** argv) { if (!params.input_prefix.empty()) { LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str()); if (params.verbose_prompt) { - auto tmp = ::common_tokenize(ctx, params.input_prefix, true, true); + auto tmp = common_tokenize(ctx, params.input_prefix, true, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); } @@ -440,7 +440,7 @@ int main(int argc, char ** argv) { if (!params.input_suffix.empty()) { LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str()); if (params.verbose_prompt) { - auto tmp = ::common_tokenize(ctx, params.input_suffix, false, true); + auto tmp = common_tokenize(ctx, params.input_suffix, false, true); for (int i = 0; i < (int) tmp.size(); i++) { LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str()); } @@ -788,7 +788,7 @@ int main(int argc, char ** argv) { if (params.interactive) { if (!params.antiprompt.empty()) { // tokenize and inject first reverse prompt - const auto first_antiprompt = ::common_tokenize(ctx, params.antiprompt.front(), false, true); + const auto first_antiprompt = common_tokenize(ctx, params.antiprompt.front(), false, true); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); is_antiprompt = true; } @@ -862,9 +862,9 @@ int main(int argc, char ** argv) { ? chat_add_and_format(model, chat_msgs, "user", std::move(buffer)) : std::move(buffer); // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix) - const auto line_pfx = ::common_tokenize(ctx, params.input_prefix, false, true); - const auto line_inp = ::common_tokenize(ctx, user_inp, false, format_chat); - const auto line_sfx = ::common_tokenize(ctx, params.input_suffix, false, true); + const auto line_pfx = common_tokenize(ctx, params.input_prefix, false, true); + const auto line_inp = common_tokenize(ctx, user_inp, false, format_chat); + const auto line_sfx = common_tokenize(ctx, params.input_suffix, false, true); LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str()); diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 0e720807f..72cca11cd 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -164,7 +164,7 @@ int main(int argc, char ** argv) { } std::vector tokens_system; - tokens_system = ::common_tokenize(ctx, k_system, true); + tokens_system = common_tokenize(ctx, k_system, true); const int32_t n_tokens_system = tokens_system.size(); llama_seq_id g_seq_id = 0; @@ -256,7 +256,7 @@ int main(int argc, char ** argv) { // do not prepend BOS because we have a system prompt! std::vector tokens_prompt; - tokens_prompt = ::common_tokenize(ctx, client.prompt, false); + tokens_prompt = common_tokenize(ctx, client.prompt, false); for (size_t i = 0; i < tokens_prompt.size(); ++i) { common_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false); diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 028094eb9..ae6e40bf4 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -92,10 +92,10 @@ int main(int argc, char ** argv) { // tokenize the prompt std::vector tokens_list; - tokens_list = ::common_tokenize(ctx, params.prompt, true); + tokens_list = common_tokenize(ctx, params.prompt, true); // tokenize the prefix and use it as a sink - const int n_tokens_prefix = ::common_tokenize(ctx, prompt_prefix, true).size(); + const int n_tokens_prefix = common_tokenize(ctx, prompt_prefix, true).size(); const int n_tokens_all = tokens_list.size(); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index d72e533fe..ade85430e 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -348,7 +348,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params & LOG_INF("%s: tokenizing the input ..\n", __func__); - std::vector tokens = ::common_tokenize(ctx, params.prompt, true); + std::vector tokens = common_tokenize(ctx, params.prompt, true); const int n_ctx = llama_n_ctx(ctx); @@ -500,7 +500,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par auto tim1 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenizing the input ..\n", __func__); - std::vector tokens = ::common_tokenize(ctx, params.prompt, true); + std::vector tokens = common_tokenize(ctx, params.prompt, true); auto tim2 = std::chrono::high_resolution_clock::now(); LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast(tim2-tim1).count()); @@ -844,7 +844,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { hs_cur.gold_ending_idx = std::stoi( prompt_lines[idx*6+1] ); for (size_t j = 0; j < 4; j++) { hs_cur.ending[j] = prompt_lines[idx*6+2+j]; - hs_cur.seq_tokens[j] = ::common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true); + hs_cur.seq_tokens[j] = common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true); } // determine the common prefix of the endings @@ -1136,8 +1136,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) { LOG_INF("%s : tokenizing selected tasks\n", __func__); for (auto & task : data) { - task.seq_tokens[0] = ::common_tokenize(ctx, task.first + task.choices[0] + task.second, true); - task.seq_tokens[1] = ::common_tokenize(ctx, task.first + task.choices[1] + task.second, true); + task.seq_tokens[0] = common_tokenize(ctx, task.first + task.choices[0] + task.second, true); + task.seq_tokens[1] = common_tokenize(ctx, task.first + task.choices[1] + task.second, true); task.common_prefix = 0; for (size_t k = 0; k < task.seq_tokens[0].size(); k++) { @@ -1152,8 +1152,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) { task.seq_tokens[0].size() - task.common_prefix + task.seq_tokens[1].size() - task.common_prefix; - task.n_base1 = ::common_tokenize(ctx, task.first + task.choices[0], true).size(); - task.n_base2 = ::common_tokenize(ctx, task.first + task.choices[1], true).size(); + task.n_base1 = common_tokenize(ctx, task.first + task.choices[0], true).size(); + task.n_base2 = common_tokenize(ctx, task.first + task.choices[1], true).size(); } LOG_INF("%s : calculating winogrande score over selected tasks.\n", __func__); diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 2aeb25ac8..b58543491 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -185,7 +185,7 @@ int main(int argc, char ** argv) { // tokenize the prompts and trim for (auto & chunk : chunks) { - auto inp = ::common_tokenize(ctx, chunk.textdata, true, false); + auto inp = common_tokenize(ctx, chunk.textdata, true, false); if (inp.size() > n_batch) { LOG_ERR("%s: chunk size (%lld) exceeds batch size (%lld), increase batch size and re-run\n", __func__, (long long int) inp.size(), (long long int) n_batch); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2380d402c..bf29d2bcf 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -771,10 +771,10 @@ struct server_context { std::vector p; if (first) { - p = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); + p = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); first = false; } else { - p = ::common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); + p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); } prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); @@ -788,7 +788,7 @@ struct server_context { } } else { auto s = json_prompt.template get(); - prompt_tokens = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); + prompt_tokens = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL); } return prompt_tokens; @@ -1073,7 +1073,7 @@ struct server_context { system_tokens.clear(); if (!system_prompt.empty()) { - system_tokens = ::common_tokenize(ctx, system_prompt, true); + system_tokens = common_tokenize(ctx, system_prompt, true); const int32_t n_batch = llama_n_batch(ctx); const int32_t n_tokens_prompt = system_tokens.size(); diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index a4b3c03cd..eb4498e7c 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -64,7 +64,7 @@ int main(int argc, char ** argv) { // tokenize the prompt std::vector tokens_list; - tokens_list = ::common_tokenize(ctx, params.prompt, true); + tokens_list = common_tokenize(ctx, params.prompt, true); const int n_ctx = llama_n_ctx(ctx); const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size()); diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 815dab2a7..b6ab1b08a 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -134,7 +134,7 @@ int main(int argc, char ** argv) { // Tokenize the prompt std::vector inp; - inp = ::common_tokenize(ctx_tgt, params.prompt, true, true); + inp = common_tokenize(ctx_tgt, params.prompt, true, true); const int max_context_size = llama_n_ctx(ctx_tgt); const int max_tokens_list_size = max_context_size - 4; diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 85cf44952..12ad54256 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -365,7 +365,7 @@ int main(int raw_argc, char ** raw_argv) { const bool parse_special = !no_parse_special; std::vector tokens; - tokens = ::common_tokenize(model, prompt, add_bos, parse_special); + tokens = common_tokenize(model, prompt, add_bos, parse_special); if (printing_ids) { printf("[");