Merge 'origin/master' into steering
This commit is contained in:
commit
95dc4d7270
18 changed files with 333 additions and 153 deletions
|
@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
} else if (arg == "--n-parts") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_parts = std::stoi(argv[i]);
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
gpt_print_usage(argc, argv, default_params);
|
||||
exit(0);
|
||||
|
@ -387,7 +381,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
}
|
||||
if (params.prompt_cache_all &&
|
||||
(params.interactive || params.interactive_first ||
|
||||
params.instruct || params.antiprompt.size())) {
|
||||
params.instruct)) {
|
||||
fprintf(stderr, "error: --prompt-cache-all not supported in interactive mode yet\n");
|
||||
gpt_print_usage(argc, argv, default_params);
|
||||
exit(1);
|
||||
|
@ -409,8 +403,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n");
|
||||
fprintf(stderr, " --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
|
||||
fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n");
|
||||
fprintf(stderr, " run in interactive mode and poll user input upon seeing PROMPT (can be\n");
|
||||
fprintf(stderr, " specified more than once for multiple prompts).\n");
|
||||
fprintf(stderr, " halt generation at PROMPT, return control in interactive mode\n");
|
||||
fprintf(stderr, " (can be specified more than once for multiple prompts).\n");
|
||||
fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n");
|
||||
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
|
||||
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||
|
@ -448,7 +442,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
|
||||
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
|
||||
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
||||
fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
|
||||
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
|
||||
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||
|
@ -508,7 +501,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
|
|||
auto lparams = llama_context_default_params();
|
||||
|
||||
lparams.n_ctx = params.n_ctx;
|
||||
lparams.n_parts = params.n_parts;
|
||||
lparams.n_gpu_layers = params.n_gpu_layers;
|
||||
lparams.seed = params.seed;
|
||||
lparams.f16_kv = params.memory_f16;
|
||||
|
@ -792,7 +784,7 @@ bool console_readline(console_state & con_st, std::string & line) {
|
|||
break;
|
||||
}
|
||||
|
||||
if (input_char == WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
||||
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
||||
end_of_stream = true;
|
||||
break;
|
||||
}
|
||||
|
@ -807,7 +799,7 @@ bool console_readline(console_state & con_st, std::string & line) {
|
|||
char32_t code = getchar32();
|
||||
if (code == '[' || code == 0x1B) {
|
||||
// Discard the rest of the escape sequence
|
||||
while ((code = getchar32()) != WEOF) {
|
||||
while ((code = getchar32()) != (char32_t) WEOF) {
|
||||
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue