Apply suggestions to main.cpp
This commit is contained in:
parent
b0ed03b9ab
commit
912e6246d6
1 changed files with 6 additions and 9 deletions
15
main.cpp
15
main.cpp
|
@ -98,8 +98,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||||
params.prompt.insert(0, 1, ' ');
|
params.prompt.insert(0, 1, ' ');
|
||||||
// tokenize the prompt
|
|
||||||
std::vector<gpt_vocab::id> embd_inp = llama_tokenize_text(ctx, params.prompt);
|
|
||||||
|
|
||||||
// prefix & suffix for instruct mode
|
// prefix & suffix for instruct mode
|
||||||
const std::vector<gpt_vocab::id> inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true);
|
const std::vector<gpt_vocab::id> inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true);
|
||||||
|
@ -161,15 +159,15 @@ int main(int argc, char ** argv) {
|
||||||
printf(ANSI_COLOR_YELLOW);
|
printf(ANSI_COLOR_YELLOW);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!llama_injest_input(ctx, params.prompt))
|
if(!llama_ingest_input(ctx, params.prompt))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Failed to injest prompt\n");
|
fprintf(stderr, "Failed to ingest prompt\n");
|
||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
// display text
|
// display text
|
||||||
input_noecho = false;
|
input_noecho = false;
|
||||||
const std::vector<gpt_vocab::id>& embd = llama_context_get_embd(ctx);
|
const std::vector<gpt_vocab::id>& embd = llama_context_get_embedding(ctx);
|
||||||
if (!input_noecho) {
|
if (!input_noecho) {
|
||||||
for (auto id : embd) {
|
for (auto id : embd) {
|
||||||
printf("%s", vocab.id_to_token[id].c_str());
|
printf("%s", vocab.id_to_token[id].c_str());
|
||||||
|
@ -183,9 +181,9 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
const std::vector<gpt_vocab::id>& last_n_tokens = llama_context_get_last_n_tokens(ctx);
|
const std::vector<gpt_vocab::id>& last_n_tokens = llama_context_get_last_n_tokens(ctx);
|
||||||
|
|
||||||
while (llama_context_not_finished(ctx) > 0) {
|
while (llama_context_is_finished(ctx) != true) {
|
||||||
gpt_vocab::id model_output = 0;
|
gpt_vocab::id model_output = 0;
|
||||||
bool response = llama_inference(ctx, model_output);
|
bool response = llama_infer(ctx, model_output);
|
||||||
if (response) {
|
if (response) {
|
||||||
printf("%s", vocab.id_to_token[model_output].c_str());
|
printf("%s", vocab.id_to_token[model_output].c_str());
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
@ -195,7 +193,6 @@ int main(int argc, char ** argv) {
|
||||||
printf(ANSI_COLOR_RESET);
|
printf(ANSI_COLOR_RESET);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// in interactive mode, and not currently processing queued inputs;
|
// in interactive mode, and not currently processing queued inputs;
|
||||||
// check if we should prompt the user for more
|
// check if we should prompt the user for more
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
|
@ -228,7 +225,7 @@ int main(int argc, char ** argv) {
|
||||||
line.pop_back(); // Remove the continue character
|
line.pop_back(); // Remove the continue character
|
||||||
}
|
}
|
||||||
// Do not clear existing context in interactive mode
|
// Do not clear existing context in interactive mode
|
||||||
llama_init_context_with_prompt(ctx, buf, false);
|
llama_update_context_with_prompt(ctx, buf, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
remaining_tokens -= line_inp.size();
|
remaining_tokens -= line_inp.size();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue