no longer necessary to disambiguate common functions with ::
This commit is contained in:
parent
e58d3b1214
commit
aee57d44c6
21 changed files with 48 additions and 48 deletions
|
@ -51,7 +51,7 @@ int main(int argc, char ** argv) {
|
|||
// tokenize the prompt
|
||||
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = ::common_tokenize(model, params.prompt, true);
|
||||
tokens_list = common_tokenize(model, params.prompt, true);
|
||||
|
||||
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size())*n_parallel;
|
||||
|
||||
|
|
|
@ -272,8 +272,8 @@ struct tokenized_prompt {
|
|||
|
||||
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
|
||||
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
|
||||
tokens_pos = ::common_tokenize(ctx, pos, add_bos, true);
|
||||
tokens_neg = ::common_tokenize(ctx, neg, add_bos, true);
|
||||
tokens_pos = common_tokenize(ctx, pos, add_bos, true);
|
||||
tokens_neg = common_tokenize(ctx, neg, add_bos, true);
|
||||
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
|
||||
padding_seq(ctx, tokens_pos, max_seq_len);
|
||||
padding_seq(ctx, tokens_neg, max_seq_len);
|
||||
|
@ -281,7 +281,7 @@ struct tokenized_prompt {
|
|||
|
||||
void padding_seq(llama_context * ctx, std::vector<llama_token> & tokens, size_t len) {
|
||||
// TODO: customize padding token
|
||||
std::vector<llama_token> pad_tokens = ::common_tokenize(ctx, " ", false);
|
||||
std::vector<llama_token> pad_tokens = common_tokenize(ctx, " ", false);
|
||||
llama_token pad_tok = pad_tokens.back();
|
||||
while (tokens.size() < len) {
|
||||
tokens.push_back(pad_tok);
|
||||
|
|
|
@ -135,7 +135,7 @@ int main(int argc, char ** argv) {
|
|||
// tokenize the prompts and trim
|
||||
std::vector<std::vector<int32_t>> inputs;
|
||||
for (const auto & prompt : prompts) {
|
||||
auto inp = ::common_tokenize(ctx, prompt, true, true);
|
||||
auto inp = common_tokenize(ctx, prompt, true, true);
|
||||
if (inp.size() > n_batch) {
|
||||
LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
|
||||
__func__, (long long int) inp.size(), (long long int) n_batch);
|
||||
|
|
|
@ -129,7 +129,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
|
|||
static bool run(llama_context * ctx, const gpt_params & params) {
|
||||
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
|
||||
|
||||
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, add_bos);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, add_bos);
|
||||
|
||||
if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), 0, 0))) {
|
||||
LOG_ERR("%s : failed to eval\n", __func__);
|
||||
|
|
|
@ -436,7 +436,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
|
|||
auto tim1 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
auto tim2 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
||||
|
|
|
@ -202,8 +202,8 @@ int main(int argc, char ** argv) {
|
|||
|
||||
std::vector<llama_token> embd_inp;
|
||||
std::vector<llama_token> embd_end;
|
||||
std::vector<llama_token> inp_pfx = ::common_tokenize(ctx, params.input_prefix, false);
|
||||
std::vector<llama_token> inp_sfx = ::common_tokenize(ctx, params.input_suffix, false);
|
||||
std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
|
||||
std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
|
||||
|
||||
GGML_ASSERT(llama_token_prefix(model) >= 0);
|
||||
GGML_ASSERT(llama_token_suffix(model) >= 0);
|
||||
|
@ -505,8 +505,8 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// tokenize new prefix and suffix
|
||||
std::vector<llama_token> inp_pfx = ::common_tokenize(ctx, params.input_prefix, false);
|
||||
std::vector<llama_token> inp_sfx = ::common_tokenize(ctx, params.input_suffix, false);
|
||||
std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
|
||||
std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
|
||||
|
||||
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
|
||||
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
|
||||
|
@ -579,7 +579,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
const size_t original_size = embd_inp.size();
|
||||
|
||||
const auto line_inp = ::common_tokenize(ctx, buffer, false);
|
||||
const auto line_inp = common_tokenize(ctx, buffer, false);
|
||||
LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str());
|
||||
|
||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||
|
|
|
@ -37,7 +37,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|||
|
||||
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
||||
std::string str2 = str;
|
||||
std::vector<llama_token> embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true);
|
||||
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
||||
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
||||
return true;
|
||||
}
|
||||
|
@ -159,14 +159,14 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|||
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
|
||||
LOG_INF("system_prompt: %s\n", system_prompt.c_str());
|
||||
if (params->verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
||||
auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
}
|
||||
}
|
||||
LOG_INF("user_prompt: %s\n", user_prompt.c_str());
|
||||
if (params->verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
}
|
||||
|
@ -176,7 +176,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
|
|||
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
|
||||
user_prompt = prompt + "\nASSISTANT:";
|
||||
if (params->verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
|
|||
|
||||
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
|
||||
std::string str2 = str;
|
||||
std::vector<llama_token> embd_inp = ::common_tokenize(ctx_llama, str2, add_bos, true);
|
||||
std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
|
||||
return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
|
||||
}
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<llama_token> inp;
|
||||
std::vector<llama_token> all;
|
||||
|
||||
inp = ::common_tokenize(ctx, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx, params.prompt, true, true);
|
||||
all = inp;
|
||||
|
||||
const int max_context_size = llama_n_ctx(ctx);
|
||||
|
|
|
@ -31,7 +31,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> inp;
|
||||
inp = ::common_tokenize(ctx, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx, params.prompt, true, true);
|
||||
fprintf(stderr, "%s: tokenization done\n", __func__);
|
||||
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> inp;
|
||||
inp = ::common_tokenize(ctx, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx, params.prompt, true, true);
|
||||
|
||||
llama_ngram_cache ngram_cache_context;
|
||||
llama_ngram_cache ngram_cache_dynamic;
|
||||
|
|
|
@ -38,7 +38,7 @@ int main(int argc, char ** argv){
|
|||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> inp;
|
||||
inp = ::common_tokenize(ctx, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx, params.prompt, true, true);
|
||||
|
||||
llama_ngram_cache ngram_cache_context;
|
||||
llama_ngram_cache ngram_cache_dynamic;
|
||||
|
|
|
@ -296,7 +296,7 @@ int main(int argc, char ** argv) {
|
|||
: params.prompt;
|
||||
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
||||
LOG_DBG("tokenize the prompt\n");
|
||||
embd_inp = ::common_tokenize(ctx, prompt, true, true);
|
||||
embd_inp = common_tokenize(ctx, prompt, true, true);
|
||||
} else {
|
||||
LOG_DBG("use session tokens\n");
|
||||
embd_inp = session_tokens;
|
||||
|
@ -415,7 +415,7 @@ int main(int argc, char ** argv) {
|
|||
for (const auto & antiprompt : params.antiprompt) {
|
||||
LOG_INF("Reverse prompt: '%s'\n", antiprompt.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx, antiprompt, false, true);
|
||||
auto tmp = common_tokenize(ctx, antiprompt, false, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
|
||||
}
|
||||
|
@ -430,7 +430,7 @@ int main(int argc, char ** argv) {
|
|||
if (!params.input_prefix.empty()) {
|
||||
LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx, params.input_prefix, true, true);
|
||||
auto tmp = common_tokenize(ctx, params.input_prefix, true, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
|
||||
}
|
||||
|
@ -440,7 +440,7 @@ int main(int argc, char ** argv) {
|
|||
if (!params.input_suffix.empty()) {
|
||||
LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str());
|
||||
if (params.verbose_prompt) {
|
||||
auto tmp = ::common_tokenize(ctx, params.input_suffix, false, true);
|
||||
auto tmp = common_tokenize(ctx, params.input_suffix, false, true);
|
||||
for (int i = 0; i < (int) tmp.size(); i++) {
|
||||
LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx, tmp[i]).c_str());
|
||||
}
|
||||
|
@ -788,7 +788,7 @@ int main(int argc, char ** argv) {
|
|||
if (params.interactive) {
|
||||
if (!params.antiprompt.empty()) {
|
||||
// tokenize and inject first reverse prompt
|
||||
const auto first_antiprompt = ::common_tokenize(ctx, params.antiprompt.front(), false, true);
|
||||
const auto first_antiprompt = common_tokenize(ctx, params.antiprompt.front(), false, true);
|
||||
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
|
||||
is_antiprompt = true;
|
||||
}
|
||||
|
@ -862,9 +862,9 @@ int main(int argc, char ** argv) {
|
|||
? chat_add_and_format(model, chat_msgs, "user", std::move(buffer))
|
||||
: std::move(buffer);
|
||||
// TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
|
||||
const auto line_pfx = ::common_tokenize(ctx, params.input_prefix, false, true);
|
||||
const auto line_inp = ::common_tokenize(ctx, user_inp, false, format_chat);
|
||||
const auto line_sfx = ::common_tokenize(ctx, params.input_suffix, false, true);
|
||||
const auto line_pfx = common_tokenize(ctx, params.input_prefix, false, true);
|
||||
const auto line_inp = common_tokenize(ctx, user_inp, false, format_chat);
|
||||
const auto line_sfx = common_tokenize(ctx, params.input_suffix, false, true);
|
||||
|
||||
LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str());
|
||||
|
||||
|
|
|
@ -164,7 +164,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
std::vector<llama_token> tokens_system;
|
||||
tokens_system = ::common_tokenize(ctx, k_system, true);
|
||||
tokens_system = common_tokenize(ctx, k_system, true);
|
||||
const int32_t n_tokens_system = tokens_system.size();
|
||||
|
||||
llama_seq_id g_seq_id = 0;
|
||||
|
@ -256,7 +256,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// do not prepend BOS because we have a system prompt!
|
||||
std::vector<llama_token> tokens_prompt;
|
||||
tokens_prompt = ::common_tokenize(ctx, client.prompt, false);
|
||||
tokens_prompt = common_tokenize(ctx, client.prompt, false);
|
||||
|
||||
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
|
||||
common_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);
|
||||
|
|
|
@ -92,10 +92,10 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = ::common_tokenize(ctx, params.prompt, true);
|
||||
tokens_list = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
// tokenize the prefix and use it as a sink
|
||||
const int n_tokens_prefix = ::common_tokenize(ctx, prompt_prefix, true).size();
|
||||
const int n_tokens_prefix = common_tokenize(ctx, prompt_prefix, true).size();
|
||||
|
||||
const int n_tokens_all = tokens_list.size();
|
||||
|
||||
|
|
|
@ -348,7 +348,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
|
|||
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
|
@ -500,7 +500,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
|||
auto tim1 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = ::common_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
auto tim2 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
||||
|
@ -844,7 +844,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
hs_cur.gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
|
||||
for (size_t j = 0; j < 4; j++) {
|
||||
hs_cur.ending[j] = prompt_lines[idx*6+2+j];
|
||||
hs_cur.seq_tokens[j] = ::common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true);
|
||||
hs_cur.seq_tokens[j] = common_tokenize(ctx, hs_cur.context + " " + hs_cur.ending[j], true);
|
||||
}
|
||||
|
||||
// determine the common prefix of the endings
|
||||
|
@ -1136,8 +1136,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
LOG_INF("%s : tokenizing selected tasks\n", __func__);
|
||||
|
||||
for (auto & task : data) {
|
||||
task.seq_tokens[0] = ::common_tokenize(ctx, task.first + task.choices[0] + task.second, true);
|
||||
task.seq_tokens[1] = ::common_tokenize(ctx, task.first + task.choices[1] + task.second, true);
|
||||
task.seq_tokens[0] = common_tokenize(ctx, task.first + task.choices[0] + task.second, true);
|
||||
task.seq_tokens[1] = common_tokenize(ctx, task.first + task.choices[1] + task.second, true);
|
||||
|
||||
task.common_prefix = 0;
|
||||
for (size_t k = 0; k < task.seq_tokens[0].size(); k++) {
|
||||
|
@ -1152,8 +1152,8 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
|||
task.seq_tokens[0].size() - task.common_prefix +
|
||||
task.seq_tokens[1].size() - task.common_prefix;
|
||||
|
||||
task.n_base1 = ::common_tokenize(ctx, task.first + task.choices[0], true).size();
|
||||
task.n_base2 = ::common_tokenize(ctx, task.first + task.choices[1], true).size();
|
||||
task.n_base1 = common_tokenize(ctx, task.first + task.choices[0], true).size();
|
||||
task.n_base2 = common_tokenize(ctx, task.first + task.choices[1], true).size();
|
||||
}
|
||||
|
||||
LOG_INF("%s : calculating winogrande score over selected tasks.\n", __func__);
|
||||
|
|
|
@ -185,7 +185,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// tokenize the prompts and trim
|
||||
for (auto & chunk : chunks) {
|
||||
auto inp = ::common_tokenize(ctx, chunk.textdata, true, false);
|
||||
auto inp = common_tokenize(ctx, chunk.textdata, true, false);
|
||||
if (inp.size() > n_batch) {
|
||||
LOG_ERR("%s: chunk size (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
|
||||
__func__, (long long int) inp.size(), (long long int) n_batch);
|
||||
|
|
|
@ -771,10 +771,10 @@ struct server_context {
|
|||
|
||||
std::vector<llama_token> p;
|
||||
if (first) {
|
||||
p = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
|
||||
p = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
|
||||
first = false;
|
||||
} else {
|
||||
p = ::common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||
}
|
||||
|
||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||
|
@ -788,7 +788,7 @@ struct server_context {
|
|||
}
|
||||
} else {
|
||||
auto s = json_prompt.template get<std::string>();
|
||||
prompt_tokens = ::common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
|
||||
prompt_tokens = common_tokenize(ctx, s, add_special, TMP_FORCE_SPECIAL);
|
||||
}
|
||||
|
||||
return prompt_tokens;
|
||||
|
@ -1073,7 +1073,7 @@ struct server_context {
|
|||
system_tokens.clear();
|
||||
|
||||
if (!system_prompt.empty()) {
|
||||
system_tokens = ::common_tokenize(ctx, system_prompt, true);
|
||||
system_tokens = common_tokenize(ctx, system_prompt, true);
|
||||
|
||||
const int32_t n_batch = llama_n_batch(ctx);
|
||||
const int32_t n_tokens_prompt = system_tokens.size();
|
||||
|
|
|
@ -64,7 +64,7 @@ int main(int argc, char ** argv) {
|
|||
// tokenize the prompt
|
||||
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = ::common_tokenize(ctx, params.prompt, true);
|
||||
tokens_list = common_tokenize(ctx, params.prompt, true);
|
||||
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
const int n_kv_req = tokens_list.size() + (n_predict - tokens_list.size());
|
||||
|
|
|
@ -134,7 +134,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// Tokenize the prompt
|
||||
std::vector<llama_token> inp;
|
||||
inp = ::common_tokenize(ctx_tgt, params.prompt, true, true);
|
||||
inp = common_tokenize(ctx_tgt, params.prompt, true, true);
|
||||
|
||||
const int max_context_size = llama_n_ctx(ctx_tgt);
|
||||
const int max_tokens_list_size = max_context_size - 4;
|
||||
|
|
|
@ -365,7 +365,7 @@ int main(int raw_argc, char ** raw_argv) {
|
|||
const bool parse_special = !no_parse_special;
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
tokens = ::common_tokenize(model, prompt, add_bos, parse_special);
|
||||
tokens = common_tokenize(model, prompt, add_bos, parse_special);
|
||||
|
||||
if (printing_ids) {
|
||||
printf("[");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue