This commit is contained in:
Xuan Son Nguyen 2024-09-05 15:55:44 +02:00
parent 6a3a2fcc5b
commit 9ae4d8a96d
26 changed files with 1828 additions and 1770 deletions

File diff suppressed because it is too large Load diff

View file

@ -170,6 +170,7 @@ struct gpt_params {
bool kl_divergence = false; // compute KL divergence
std::function<void(int, char **)> print_usage = nullptr; // print example-specific usage and example
bool usage = false; // print usage
bool use_color = false; // use color to distinguish generations and inputs
bool special = false; // enable special token output
@ -279,73 +280,67 @@ struct gpt_params {
};
enum llama_example {
LLAMA_EXAMPLE_ALL,
LLAMA_EXAMPLE_SERVER,
LLAMA_EXAMPLE_COMMON,
LLAMA_EXAMPLE_SPECULATIVE,
LLAMA_EXAMPLE_MAIN,
LLAMA_EXAMPLE_INFILL,
LLAMA_EXAMPLE_EMBEDDING,
LLAMA_EXAMPLE_PERPLEXITY,
LLAMA_EXAMPLE_RETRIEVAL,
LLAMA_EXAMPLE_PASSKEY,
LLAMA_EXAMPLE_IMATRIX,
LLAMA_EXAMPLE_BENCH,
LLAMA_EXAMPLE_SERVER,
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
LLAMA_EXAMPLE_EXPORT_LORA,
LLAMA_EXAMPLE_COUNT,
};
struct llama_arg {
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_ALL};
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
std::vector<std::string> args;
std::string value_ex;
std::string value_hint; // help text or example for arg value
std::string value_hint_2; // for second arg value
std::string env;
std::string help;
std::function<bool(void)> handler_void = nullptr;
std::function<bool(std::string)> handler_string = nullptr;
std::function<bool(bool)> handler_bool = nullptr;
std::function<bool(int)> handler_int = nullptr;
std::function<bool(float)> handler_float = nullptr;
std::function<void(void)> handler_void = nullptr;
std::function<void(std::string)> handler_string = nullptr;
std::function<void(std::string, std::string)> handler_str_str = nullptr;
std::function<void(int)> handler_int = nullptr;
llama_arg(std::vector<std::string> args, std::string help, std::function<bool(std::string)> handler) : args(args), help(help), handler_string(handler) {}
llama_arg(std::vector<std::string> args, std::string value_hint, std::string help, std::function<void(std::string)> handler) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
llama_arg(std::vector<std::string> args, std::string help, std::function<bool(bool)> handler) : args(args), help(help), handler_bool(handler) {}
llama_arg(std::vector<std::string> args, std::string value_hint, std::string help, std::function<void(int)> handler) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
llama_arg(std::vector<std::string> args, std::string help, std::function<bool(void)> handler) : args(args), help(help), handler_void(handler) {}
llama_arg(std::vector<std::string> args, std::string help, std::function<void(void)> handler) : args(args), help(help), handler_void(handler) {}
llama_arg & set_examples(std::set<enum llama_example> _examples) {
examples = std::move(_examples);
// support 2 values for arg
llama_arg(std::vector<std::string> args, std::string value_hint, std::string value_hint_2, std::string help, std::function<void(std::string, std::string)> handler) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
llama_arg & set_examples(std::set<enum llama_example> examples) {
this->examples = std::move(examples);
return *this;
}
llama_arg & set_value_ex(std::string _value_ex) {
value_ex = std::move(_value_ex);
llama_arg & set_env(std::string env) {
this->env = std::move(env);
return *this;
}
llama_arg & set_env(std::string _env) {
env = _env;
return *this;
}
// utility function
static std::vector<std::string> break_str_into_lines(std::string input, size_t max_char_per_line) {
std::vector<std::string> result;
std::istringstream iss(input);
std::string word, line;
while (iss >> word) {
if (line.length() + !line.empty() + word.length() > max_char_per_line) {
if (!line.empty()) result.push_back(line);
line = word;
} else {
line += (!line.empty() ? " " : "") + word;
}
}
if (!line.empty()) result.push_back(line);
return result;
bool in_example(enum llama_example ex) {
return examples.find(ex) != examples.end();
}
};
std::vector<llama_arg> gpt_params_parser_register(gpt_params & params);
bool gpt_params_parser_run(int argc, char ** argv, std::vector<llama_arg> & options);
std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex);
std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex, std::function<void(int, char **)> print_usage);
bool gpt_params_parse (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
void gpt_params_print_usage(std::vector<llama_arg> & options);
void gpt_params_parse_from_env(gpt_params & params);
void gpt_params_handle_model_default(gpt_params & params);
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
std::string gpt_params_get_system_info(const gpt_params & params);
bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);

View file

@ -28,9 +28,7 @@ static std::vector<int> parse_list(char * p) {
return ret;
}
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps]\n", argv[0]);
LOG_TEE("\n");
@ -39,8 +37,8 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_BENCH, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -7,9 +7,7 @@
#include <string>
#include <vector>
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s -m model.gguf -p \"Hello my name is\" -n 32 -np 4\n", argv[0]);
LOG_TEE("\n");
@ -21,8 +19,8 @@ int main(int argc, char ** argv) {
params.prompt = "Hello my name is";
params.n_predict = 32;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -35,9 +35,7 @@ static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
return ret;
}
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
printf("\nexample usage:\n");
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
@ -390,8 +388,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_CVECTOR_GENERATOR, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -79,8 +79,8 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_EMBEDDING);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -144,8 +144,8 @@ int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -391,9 +391,7 @@ struct lora_merge_ctx {
}
};
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
printf("\nexample usage:\n");
printf("\n %s -m base-model.gguf --lora lora-file.gguf -o merged-model-f16.gguf\n", argv[0]);
printf("\nNOTE: output model is F16\n");
@ -403,8 +401,8 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -157,8 +157,8 @@ static std::string gritlm_instruction(const std::string & instruction) {
int main(int argc, char * argv[]) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -17,9 +17,7 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \\\n"
@ -579,8 +577,8 @@ int main(int argc, char ** argv) {
params.logits_all = true;
params.verbosity = 1;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -106,8 +106,8 @@ int main(int argc, char ** argv) {
llama_sampling_params & sparams = params.sparams;
g_params = &params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_INFILL);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -112,9 +112,7 @@ struct llava_context {
struct llama_model * model = NULL;
};
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\n example usage:\n");
LOG_TEE("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
@ -280,8 +278,8 @@ int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}
@ -293,7 +291,7 @@ int main(int argc, char ** argv) {
#endif // LOG_DISABLE_LOGS
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
print_usage(argc, argv, {});
print_usage(argc, argv);
return 1;
}
auto model = llava_init(&params);

View file

@ -253,8 +253,8 @@ int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
show_additional_info(argc, argv);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, show_additional_info);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}
@ -266,7 +266,6 @@ int main(int argc, char ** argv) {
#endif // LOG_DISABLE_LOGS
if (params.mmproj.empty() || (params.image.empty())) {
gpt_params_print_usage(argc, argv, params);
show_additional_info(argc, argv);
return 1;
}

View file

@ -37,8 +37,8 @@ struct ngram_container {
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -13,8 +13,8 @@
int main(int argc, char ** argv){
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -15,8 +15,8 @@
int main(int argc, char ** argv){
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -14,8 +14,8 @@
int main(int argc, char ** argv){
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -131,12 +131,9 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector<l
int main(int argc, char ** argv) {
gpt_params params;
g_params = &params;
auto options = gpt_params_parser_register(params);
gpt_params_parser_run(argc, argv, options);
return 0;
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_MAIN);
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -100,8 +100,8 @@ int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -6,9 +6,7 @@
#include <string>
#include <vector>
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s -m model.gguf --junk 250 --pos 90 --keep 32 --grp-attn-n 2 [--seed 1234]\n", argv[0]);
LOG_TEE("\n");
@ -21,8 +19,8 @@ int main(int argc, char ** argv) {
params.n_keep = 32;
params.i_pos = -1;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_PASSKEY, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -1967,8 +1967,8 @@ int main(int argc, char ** argv) {
params.n_ctx = 512;
params.logits_all = true;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_PERPLEXITY);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -4,9 +4,7 @@
#include <algorithm>
#include <fstream>
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .\n", argv[0]);
LOG_TEE("\n");
@ -113,8 +111,8 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_RETRIEVAL, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -10,8 +10,8 @@ int main(int argc, char ** argv) {
params.prompt = "The quick brown fox";
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -2491,14 +2491,11 @@ int main(int argc, char ** argv) {
// own arguments required by this example
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_SERVER);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}
// parse arguments from environment variables
gpt_params_parse_from_env(params);
// TODO: not great to use extern vars
server_log_json = params.log_json;
server_verbose = params.verbosity > 0;

View file

@ -6,9 +6,7 @@
#include <string>
#include <vector>
static void print_usage(int argc, char ** argv, const gpt_params & params) {
gpt_params_print_usage(argc, argv, params);
static void print_usage(int, char ** argv) {
LOG_TEE("\nexample usage:\n");
LOG_TEE("\n %s -m model.gguf -p \"Hello my name is\" -n 32\n", argv[0]);
LOG_TEE("\n");
@ -20,8 +18,8 @@ int main(int argc, char ** argv) {
params.prompt = "Hello my name is";
params.n_predict = 32;
if (!gpt_params_parse(argc, argv, params)) {
print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}

View file

@ -27,8 +27,8 @@ struct seq_draft {
int main(int argc, char ** argv) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_SPECULATIVE);
if (!gpt_params_parse(argc, argv, params, options)) {
return 1;
}