Change argument processing to allow prompt or file args. (#103)
This commit is contained in:
parent
428aa7025a
commit
9116ae9b53
3 changed files with 38 additions and 26 deletions
43
chat.cpp
43
chat.cpp
|
@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
|||
fin.close();
|
||||
|
||||
std::vector<uint8_t> tmp;
|
||||
|
||||
|
||||
for (int i = 0; i < n_parts; ++i) {
|
||||
const int part_id = i;
|
||||
//const int part_id = n_parts - i - 1;
|
||||
|
@ -797,14 +797,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
gpt_params params;
|
||||
|
||||
params.temp = 0.1f;
|
||||
params.top_p = 0.95f;
|
||||
params.n_ctx = 2048;
|
||||
params.interactive = true;
|
||||
params.interactive_start = true;
|
||||
params.use_color = true;
|
||||
params.model = "ggml-alpaca-7b-q4.bin";
|
||||
|
||||
if (gpt_params_parse(argc, argv, params) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
|
|||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||
// params.prompt.insert(0, 1, ' ');
|
||||
// tokenize the prompt
|
||||
std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
|
||||
std::vector<gpt_vocab::id> embd_inp;
|
||||
|
||||
// params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
|
||||
|
||||
// // tokenize the reverse prompt
|
||||
// std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
|
||||
|
||||
|
||||
std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
|
||||
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
|
||||
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
|
||||
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
|
||||
|
||||
if(!params.prompt.empty()) {
|
||||
std::vector<gpt_vocab::id> param_inp = ::llama_tokenize(vocab, params.prompt, true);
|
||||
embd_inp.insert(embd_inp.end(), prompt_inp.begin(), prompt_inp.end());
|
||||
embd_inp.insert(embd_inp.end(), param_inp.begin(), param_inp.end());
|
||||
embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
|
||||
}
|
||||
|
||||
// fprintf(stderr, "\n");
|
||||
// fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
// fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
|
@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
|
|||
// }
|
||||
// fprintf(stderr, "\n");
|
||||
|
||||
std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
|
||||
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
|
||||
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
|
||||
|
||||
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());
|
||||
|
||||
|
||||
if (params.interactive) {
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
struct sigaction sigint_action;
|
||||
|
@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// end of text token
|
||||
if (embd.back() == 2) {
|
||||
// fprintf(stderr, " [end of text]\n");
|
||||
is_interacting = true;
|
||||
continue;
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
continue;
|
||||
} else {
|
||||
printf("\n");
|
||||
fprintf(stderr, " [end of text]\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,9 +24,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
} else if (arg == "-t" || arg == "--threads") {
|
||||
params.n_threads = std::stoi(argv[++i]);
|
||||
} else if (arg == "-p" || arg == "--prompt") {
|
||||
params.interactive = false;
|
||||
params.interactive_start = false;
|
||||
params.use_color = false;
|
||||
|
||||
params.prompt = argv[++i];
|
||||
} else if (arg == "-f" || arg == "--file") {
|
||||
|
||||
params.interactive = false;
|
||||
params.interactive_start = false;
|
||||
params.use_color = false;
|
||||
|
||||
std::ifstream file(argv[++i]);
|
||||
|
||||
std::copy(std::istreambuf_iterator<char>(file),
|
||||
|
|
13
utils.h
13
utils.h
|
@ -12,28 +12,29 @@
|
|||
// CLI argument parsing
|
||||
//
|
||||
|
||||
// The default parameters
|
||||
struct gpt_params {
|
||||
int32_t seed = -1; // RNG seed
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
int32_t n_predict = 128; // new tokens to predict
|
||||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||
int32_t n_ctx = 512; //context size
|
||||
int32_t n_ctx = 2048; //context size
|
||||
|
||||
// sampling parameters
|
||||
int32_t top_k = 40;
|
||||
float top_p = 0.95f;
|
||||
float temp = 0.80f;
|
||||
float temp = 0.10f;
|
||||
float repeat_penalty = 1.30f;
|
||||
|
||||
int32_t n_batch = 8; // batch size for prompt processing
|
||||
|
||||
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
||||
std::string model = "ggml-alpaca-7b-q4.bin"; // model path
|
||||
std::string prompt;
|
||||
|
||||
bool use_color = false; // use color to distinguish generations and inputs
|
||||
bool use_color = true; // use color to distinguish generations and inputs
|
||||
|
||||
bool interactive = false; // interactive mode
|
||||
bool interactive_start = false; // reverse prompt immediately
|
||||
bool interactive = true; // interactive mode
|
||||
bool interactive_start = true; // reverse prompt immediately
|
||||
std::string antiprompt = ""; // string upon seeing which more user input is prompted
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue