common : rework usage print (wip)
This commit is contained in:
parent
123175ea71
commit
8f717fd3bb
9 changed files with 106 additions and 145 deletions
|
@ -289,7 +289,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
|
// TODO: this is temporary, in the future the sampling state will be moved fully to llama_sampling_context.
|
||||||
params.seed = std::stoul(argv[i]);
|
params.seed = std::stoul(argv[i]);
|
||||||
sparams.seed = std::stoul(argv[i]);
|
sparams.seed = std::stoul(argv[i]);
|
||||||
return true;
|
return true;
|
||||||
|
@ -901,11 +901,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.interactive = true;
|
params.interactive = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "--interactive-specials") {
|
if (arg == "-sp" || arg == "--special") {
|
||||||
params.interactive_specials = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (arg == "--special") {
|
|
||||||
params.special = true;
|
params.special = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -913,7 +909,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.embedding = true;
|
params.embedding = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "--interactive-first") {
|
if (arg == "-if" || arg == "--interactive-first") {
|
||||||
params.interactive_first = true;
|
params.interactive_first = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -965,7 +961,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.flash_attn = true;
|
params.flash_attn = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "--color") {
|
if (arg == "-co" || arg == "--color") {
|
||||||
params.use_color = true;
|
params.use_color = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1252,10 +1248,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
if (arg == "--random-prompt") {
|
|
||||||
params.random_prompt = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (arg == "--in-prefix-bos") {
|
if (arg == "--in-prefix-bos") {
|
||||||
params.input_prefix_bos = true;
|
params.input_prefix_bos = true;
|
||||||
return true;
|
return true;
|
||||||
|
@ -1349,6 +1341,16 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||||
|
#else
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||||
const llama_sampling_params & sparams = params.sparams;
|
const llama_sampling_params & sparams = params.sparams;
|
||||||
|
|
||||||
|
@ -1360,52 +1362,83 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
}
|
}
|
||||||
sampler_type_names.pop_back();
|
sampler_type_names.pop_back();
|
||||||
|
|
||||||
|
struct option_info {
|
||||||
|
LLAMA_COMMON_ATTRIBUTE_FORMAT(4, 5)
|
||||||
|
option_info(const std::string & tags, const char * args, const char * desc, ...) : tags(tags), args(args), desc(desc) {
|
||||||
|
va_list args_list;
|
||||||
|
va_start(args_list, desc);
|
||||||
|
char buffer[1024];
|
||||||
|
vsnprintf(buffer, sizeof(buffer), desc, args_list);
|
||||||
|
va_end(args_list);
|
||||||
|
this->desc = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string tags;
|
||||||
|
std::string args;
|
||||||
|
std::string desc;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<option_info> options;
|
||||||
|
|
||||||
|
// TODO: filter by tags
|
||||||
|
|
||||||
|
options.push_back({ "*", "-h, --help, --usage", "print usage and exit" });
|
||||||
|
options.push_back({ "*", " --version", "show version and build info" });
|
||||||
|
options.push_back({ "*", "-co, --color", "colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false" });
|
||||||
|
options.push_back({ "*", "-s, --seed SEED", "RNG seed (default: %d, use random seed for < 0)", params.seed });
|
||||||
|
options.push_back({ "*", "-t, --threads N", "number of threads to use during generation (default: %d)", params.n_threads });
|
||||||
|
options.push_back({ "*", "-tb, --threads-batch N", "number of threads to use during batch and prompt processing (default: same as --threads)" });
|
||||||
|
options.push_back({ "speculative", "-td, --threads-draft N", "number of threads to use during generation (default: same as --threads)" });
|
||||||
|
options.push_back({ "speculative", "-tbd, --threads-batch-draft N", "number of threads to use during batch and prompt processing (default: same as --threads-draft)" });
|
||||||
|
options.push_back({ "*", "-c, --ctx-size N", "size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx });
|
||||||
|
options.push_back({ "*", "-n, --n-predict N", "number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict });
|
||||||
|
options.push_back({ "*", "-b, --batch-size N", "logical maximum batch size (default: %d)", params.n_batch });
|
||||||
|
options.push_back({ "*", "-ub, --ubatch-size N", "physical maximum batch size (default: %d)", params.n_ubatch });
|
||||||
|
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with (default: empty)" });
|
||||||
|
options.push_back({ "*", "-f, --file FNAME", "a file containing the prompt (default: none)" });
|
||||||
|
options.push_back({ "*", "-bf, --binary-file FNAME", "binary file containing the prompt (default: none)" });
|
||||||
|
options.push_back({ "*", "-e, --escape", "process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)" });
|
||||||
|
options.push_back({ "main", " --prompt-cache FNAME", "file to cache prompt state for faster startup (default: none)" });
|
||||||
|
options.push_back({ "main", " --prompt-cache-all", "if specified, saves user input and generations to cache as well\nnot supported with --interactive or other interactive options" });
|
||||||
|
options.push_back({ "main", " --prompt-cache-ro", "if specified, uses the prompt cache but does not update it" });
|
||||||
|
options.push_back({ "main", "-r, --reverse-prompt PROMPT", "halt generation at PROMPT, return control in interactive mode\ncan be specified more than once for multiple prompts" });
|
||||||
|
|
||||||
|
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
|
||||||
|
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)", params.conversation ? "true" : "false" });
|
||||||
|
options.push_back({ "main", "-ins, --instruct", "run in instruction mode (use with Alpaca models) (default: %s)", params.instruct ? "true" : "false" });
|
||||||
|
options.push_back({ "main", "-cml, --chatml", "run in chatml mode (use with ChatML-compatible models) (default: %s)", params.chatml ? "true" : "false" });
|
||||||
|
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
|
||||||
|
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
|
||||||
|
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
|
||||||
|
options.push_back({ "main infill", " --in-prefix-bos", "prefix BOS to user inputs, preceding the `--in-prefix` string" });
|
||||||
|
options.push_back({ "main infill", " --in-prefix STRING", "string to prefix user inputs with (default: empty)" });
|
||||||
|
options.push_back({ "main infill", " --in-suffix STRING", "string to suffix after user inputs with (default: empty)" });
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("usage: %s [options]\n", argv[0]);
|
printf("usage: %s [options]\n", argv[0]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("options:\n");
|
printf("options:\n\n");
|
||||||
printf(" -h, --help, --usage print usage and exit\n");
|
|
||||||
printf(" --version show version and build info\n");
|
for (const auto & o : options) {
|
||||||
printf(" -i, --interactive run in interactive mode\n");
|
printf(" %-32s", o.args.c_str());
|
||||||
printf(" --special special tokens output enabled\n");
|
if (o.args.length() > 34) {
|
||||||
printf(" --interactive-specials allow special tokens in user text, in interactive mode\n");
|
printf("\n%34s", "");
|
||||||
printf(" --interactive-first run in interactive mode and wait for input right away\n");
|
}
|
||||||
printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n");
|
|
||||||
printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n");
|
//printf("%s\n", o.desc.c_str());
|
||||||
printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n");
|
// print line by line and pad with spaces
|
||||||
printf(" --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n");
|
const auto desc = o.desc;
|
||||||
printf(" -r PROMPT, --reverse-prompt PROMPT\n");
|
size_t start = 0;
|
||||||
printf(" halt generation at PROMPT, return control in interactive mode\n");
|
size_t end = desc.find('\n');
|
||||||
printf(" (can be specified more than once for multiple prompts).\n");
|
while (end != std::string::npos) {
|
||||||
printf(" --color colorise output to distinguish prompt and user input from generations\n");
|
printf("%s\n%34s", desc.substr(start, end - start).c_str(), "");
|
||||||
printf(" -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
|
start = end + 1;
|
||||||
printf(" -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads);
|
end = desc.find('\n', start);
|
||||||
printf(" -tb N, --threads-batch N\n");
|
}
|
||||||
printf(" number of threads to use during batch and prompt processing (default: same as --threads)\n");
|
|
||||||
printf(" -td N, --threads-draft N");
|
printf("%s\n", desc.substr(start).c_str());
|
||||||
printf(" number of threads to use during generation (default: same as --threads)\n");
|
}
|
||||||
printf(" -tbd N, --threads-batch-draft N\n");
|
|
||||||
printf(" number of threads to use during batch and prompt processing (default: same as --threads-draft)\n");
|
|
||||||
printf(" -p PROMPT, --prompt PROMPT\n");
|
|
||||||
printf(" prompt to start generation with (default: empty)\n");
|
|
||||||
printf(" -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
|
|
||||||
printf(" --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n");
|
|
||||||
printf(" --prompt-cache-all if specified, saves user input and generations to cache as well.\n");
|
|
||||||
printf(" not supported with --interactive or other interactive options\n");
|
|
||||||
printf(" --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
|
|
||||||
printf(" --random-prompt start with a randomized prompt.\n");
|
|
||||||
printf(" --in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string\n");
|
|
||||||
printf(" --in-prefix STRING string to prefix user inputs with (default: empty)\n");
|
|
||||||
printf(" --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
|
|
||||||
printf(" -f FNAME, --file FNAME\n");
|
|
||||||
printf(" prompt file to start generation.\n");
|
|
||||||
printf(" -bf FNAME, --binary-file FNAME\n");
|
|
||||||
printf(" binary file containing multiple choice tasks.\n");
|
|
||||||
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
|
||||||
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
|
|
||||||
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
|
|
||||||
printf(" -ub N, --ubatch-size N\n");
|
|
||||||
printf(" physical maximum batch size (default: %d)\n", params.n_ubatch);
|
|
||||||
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
|
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
|
||||||
printf(" (default: %s)\n", sampler_type_names.c_str());
|
printf(" (default: %s)\n", sampler_type_names.c_str());
|
||||||
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
|
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
|
||||||
|
@ -1549,6 +1582,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
printf(" print token count every N tokens (default: %d)\n", params.n_print);
|
printf(" print token count every N tokens (default: %d)\n", params.n_print);
|
||||||
printf(" --check-tensors check model tensor data for invalid values\n");
|
printf(" --check-tensors check model tensor data for invalid values\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
#ifndef LOG_DISABLE_LOGS
|
#ifndef LOG_DISABLE_LOGS
|
||||||
log_print_usage();
|
log_print_usage();
|
||||||
#endif // LOG_DISABLE_LOGS
|
#endif // LOG_DISABLE_LOGS
|
||||||
|
@ -1611,24 +1645,6 @@ std::string string_get_sortable_timestamp() {
|
||||||
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
|
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string string_random_prompt(std::mt19937 & rng) {
|
|
||||||
const int r = rng() % 10;
|
|
||||||
switch (r) {
|
|
||||||
case 0: return "So";
|
|
||||||
case 1: return "Once upon a time";
|
|
||||||
case 2: return "When";
|
|
||||||
case 3: return "The";
|
|
||||||
case 4: return "After";
|
|
||||||
case 5: return "If";
|
|
||||||
case 6: return "import";
|
|
||||||
case 7: return "He";
|
|
||||||
case 8: return "She";
|
|
||||||
case 9: return "They";
|
|
||||||
}
|
|
||||||
|
|
||||||
GGML_UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
void string_process_escapes(std::string & input) {
|
void string_process_escapes(std::string & input) {
|
||||||
std::size_t input_len = input.length();
|
std::size_t input_len = input.length();
|
||||||
std::size_t output_idx = 0;
|
std::size_t output_idx = 0;
|
||||||
|
@ -2906,7 +2922,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
|
||||||
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
|
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
|
||||||
fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false");
|
fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false");
|
||||||
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
|
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
|
||||||
fprintf(stream, "interactive_specials: %s # default: false\n", params.interactive_specials ? "true" : "false");
|
|
||||||
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
|
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
|
||||||
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
|
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
|
||||||
fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
|
fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
|
||||||
|
@ -2956,7 +2971,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
|
||||||
fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false");
|
fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false");
|
||||||
fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false");
|
fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false");
|
||||||
yaml_dump_vector_int(stream, "prompt_tokens", prompt_tokens);
|
yaml_dump_vector_int(stream, "prompt_tokens", prompt_tokens);
|
||||||
fprintf(stream, "random_prompt: %s # default: false\n", params.random_prompt ? "true" : "false");
|
|
||||||
fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat);
|
fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat);
|
||||||
|
|
||||||
fprintf(stream, "reverse_prompt:\n");
|
fprintf(stream, "reverse_prompt:\n");
|
||||||
|
|
|
@ -99,23 +99,23 @@ struct gpt_params {
|
||||||
// // sampling parameters
|
// // sampling parameters
|
||||||
struct llama_sampling_params sparams;
|
struct llama_sampling_params sparams;
|
||||||
|
|
||||||
std::string model = ""; // model path
|
std::string model = ""; // model path
|
||||||
std::string model_draft = ""; // draft model for speculative decoding
|
std::string model_draft = ""; // draft model for speculative decoding
|
||||||
std::string model_alias = "unknown"; // model alias
|
std::string model_alias = "unknown"; // model alias
|
||||||
std::string model_url = ""; // model url to download
|
std::string model_url = ""; // model url to download
|
||||||
std::string hf_repo = ""; // HF repo
|
std::string hf_repo = ""; // HF repo
|
||||||
std::string hf_file = ""; // HF file
|
std::string hf_file = ""; // HF file
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
std::string prompt_file = ""; // store the external prompt file name
|
std::string prompt_file = ""; // store the external prompt file name
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string input_suffix = ""; // string to suffix user inputs with
|
||||||
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
|
std::string logdir = ""; // directory in which to save YAML log files
|
||||||
std::string logdir = ""; // directory in which to save YAML log files
|
|
||||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
||||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
||||||
std::string logits_file = ""; // file for saving *all* logits
|
std::string logits_file = ""; // file for saving *all* logits
|
||||||
|
|
||||||
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||||
std::vector<llama_model_kv_override> kv_overrides;
|
std::vector<llama_model_kv_override> kv_overrides;
|
||||||
|
|
||||||
// TODO: avoid tuple, use struct
|
// TODO: avoid tuple, use struct
|
||||||
|
@ -143,11 +143,10 @@ struct gpt_params {
|
||||||
bool kl_divergence = false; // compute KL divergence
|
bool kl_divergence = false; // compute KL divergence
|
||||||
|
|
||||||
bool usage = false; // print usage
|
bool usage = false; // print usage
|
||||||
bool random_prompt = false; // do not randomize prompt if none provided
|
|
||||||
bool use_color = false; // use color to distinguish generations and inputs
|
bool use_color = false; // use color to distinguish generations and inputs
|
||||||
bool interactive = false; // interactive mode
|
|
||||||
bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode
|
|
||||||
bool special = false; // enable special token output
|
bool special = false; // enable special token output
|
||||||
|
bool interactive = false; // interactive mode
|
||||||
|
bool interactive_first = false; // wait for user input immediately
|
||||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||||
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
|
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
|
||||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||||
|
@ -155,7 +154,6 @@ struct gpt_params {
|
||||||
|
|
||||||
bool embedding = false; // get only sentence embedding
|
bool embedding = false; // get only sentence embedding
|
||||||
bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
||||||
bool interactive_first = false; // wait for user input immediately
|
|
||||||
bool multiline_input = false; // reverse the usage of `\`
|
bool multiline_input = false; // reverse the usage of `\`
|
||||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||||
|
@ -200,7 +198,6 @@ std::vector<std::string> string_split(std::string input, char separator);
|
||||||
|
|
||||||
std::string string_strip(const std::string & str);
|
std::string string_strip(const std::string & str);
|
||||||
std::string string_get_sortable_timestamp();
|
std::string string_get_sortable_timestamp();
|
||||||
std::string string_random_prompt(std::mt19937 & rng);
|
|
||||||
|
|
||||||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||||
void string_process_escapes(std::string & input);
|
void string_process_escapes(std::string & input);
|
||||||
|
|
|
@ -80,9 +80,6 @@ int main(int argc, char ** argv) {
|
||||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = string_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
|
@ -152,9 +152,6 @@ int main(int argc, char ** argv) {
|
||||||
print_build_info();
|
print_build_info();
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = string_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
|
@ -598,9 +598,6 @@ int main(int argc, char ** argv) {
|
||||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = string_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
sparams.dataset = params.prompt_file;
|
sparams.dataset = params.prompt_file;
|
||||||
g_collector.set_parameters(std::move(sparams));
|
g_collector.set_parameters(std::move(sparams));
|
||||||
|
|
|
@ -140,27 +140,6 @@ int main(int argc, char ** argv) {
|
||||||
LOG_TEE("%s: warning: minimum context size is 8, using minimum size.\n", __func__);
|
LOG_TEE("%s: warning: minimum context size is 8, using minimum size.\n", __func__);
|
||||||
params.n_ctx = 8;
|
params.n_ctx = 8;
|
||||||
}
|
}
|
||||||
if (params.instruct) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("%s: please use the 'main' tool for instruct mode\n", __func__);
|
|
||||||
printf("************\n\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (params.chatml) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("%s: please use the 'main' tool for chatml mode\n", __func__);
|
|
||||||
printf("************\n\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (!params.antiprompt.empty()) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("%s: please use the 'main' tool for antiprompt mode\n", __func__);
|
|
||||||
printf("************\n\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (!params.interactive_first && (params.input_prefix.empty() && params.input_suffix.empty())) {
|
if (!params.interactive_first && (params.input_prefix.empty() && params.input_suffix.empty())) {
|
||||||
printf("\n************\n");
|
printf("\n************\n");
|
||||||
printf("%s: please use '--interactive_first' or specify '--in_prefix' and/or '--in_suffix'\n", __func__);
|
printf("%s: please use '--interactive_first' or specify '--in_prefix' and/or '--in_suffix'\n", __func__);
|
||||||
|
@ -168,20 +147,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (params.random_prompt) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("%s: please use the 'main' tool for random prompt mode\n", __func__);
|
|
||||||
printf("************\n\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (!params.path_prompt_cache.empty()) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("%s: infill does not support prompt caching\n", __func__);
|
|
||||||
printf("************\n\n");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.rope_freq_base != 0.0) {
|
if (params.rope_freq_base != 0.0) {
|
||||||
LOG_TEE("%s: warning: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base);
|
LOG_TEE("%s: warning: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base);
|
||||||
|
|
|
@ -182,9 +182,6 @@ int main(int argc, char ** argv) {
|
||||||
LOG_TEE("%s: seed = %u\n", __func__, params.seed);
|
LOG_TEE("%s: seed = %u\n", __func__, params.seed);
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = string_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("%s: llama backend init\n", __func__);
|
LOG("%s: llama backend init\n", __func__);
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
|
@ -893,7 +890,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||||
const auto line_inp = ::llama_tokenize(ctx, buffer, false, params.interactive_specials);
|
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
||||||
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
||||||
|
|
||||||
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
||||||
|
|
|
@ -1032,7 +1032,7 @@ struct winogrande_entry {
|
||||||
std::vector<llama_token> seq_tokens[2];
|
std::vector<llama_token> seq_tokens[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string& prompt) {
|
static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string & prompt) {
|
||||||
std::vector<winogrande_entry> result;
|
std::vector<winogrande_entry> result;
|
||||||
std::istringstream in(prompt);
|
std::istringstream in(prompt);
|
||||||
std::string line;
|
std::string line;
|
||||||
|
@ -2007,9 +2007,6 @@ int main(int argc, char ** argv) {
|
||||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = string_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
|
@ -110,7 +110,7 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
||||||
static void llama_log_internal (ggml_log_level level, const char* format, ...);
|
static void llama_log_internal (ggml_log_level level, const char * format, ...);
|
||||||
static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
|
||||||
|
|
||||||
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue