diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index ee5f72974..4a15941f1 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -15,14 +15,14 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_BENCH, print_usage)) { return 1; } + gpt_init(); + int is_pp_shared = params.is_pp_shared; std::vector n_pp = params.n_pp; diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 349ddb7f2..7887a43d6 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -15,8 +15,6 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - gpt_log_init(); - gpt_params params; params.prompt = "Hello my name is"; @@ -26,6 +24,7 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); // number of parallel batches int n_parallel = params.n_parallel; diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3e2712753..a438dcb5a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -79,14 +79,14 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } int main(int argc, char ** argv) { - gpt_log_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_EMBEDDING)) { return 1; } + gpt_init(); + params.embedding = true; // For non-causal models, batch size must be equal to ubatch size params.n_ubatch = params.n_batch; diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index b0ff671f1..6d629fe4e 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -140,8 +140,6 @@ static bool run(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - gpt_init(); - callback_data cb_data; gpt_params params; @@ -150,6 +148,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index 14c715202..20b99a4fd 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -158,6 +158,8 @@ int main(int argc, char * argv[]) { return 1; } + gpt_init(); + llama_model_params mparams = llama_model_params_from_gpt_params(params); llama_context_params cparams = llama_context_params_from_gpt_params(params); diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 0f823d07a..55d17b228 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -573,8 +573,6 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; params.n_ctx = 512; @@ -585,6 +583,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + params.n_batch = std::min(params.n_batch, params.n_ctx); g_collector.set_params(params); diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 7c7c29276..b77b876cc 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -104,8 +104,6 @@ static void sigint_handler(int signo) { #endif int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; g_params = ¶ms; @@ -113,6 +111,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + auto & sparams = params.sparams; console::init(params.simple_io, params.use_color); diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 955d7af15..8f437863f 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -270,8 +270,6 @@ static void llava_free(struct llava_context * ctx_llava) { } int main(int argc, char ** argv) { - gpt_init(); - ggml_time_init(); gpt_params params; @@ -280,6 +278,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { print_usage(argc, argv); return 1; diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 526b0a52b..c5156c35b 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -248,8 +248,6 @@ static const char * llama_loop(struct llava_context * ctx_llava,struct gpt_sampl } int main(int argc, char ** argv) { - gpt_init(); - ggml_time_init(); gpt_params params; @@ -258,6 +256,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + if (params.mmproj.empty() || (params.image.empty())) { show_additional_info(argc, argv); return 1; diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index 05a5d67c3..49870b4a4 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -37,14 +37,14 @@ struct ngram_container { }; int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } + gpt_init(); + const int W = 15; // lookahead window const int N = 5; // n-gram size const int G = 15; // max verification n-grams diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index 752b34068..6d1e1ceb9 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -13,14 +13,14 @@ #include int main(int argc, char ** argv){ - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) { return 1; } + gpt_init(); + const int n_draft = params.n_draft; // init llama.cpp diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index a89c3645d..2ccd0e6c1 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -13,14 +13,14 @@ #include int main(int argc, char ** argv){ - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) { return 1; } + gpt_init(); + // max. number of additional tokens to draft if match is found const int n_draft = params.n_draft; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 877daeb58..d9e45ce2f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -131,14 +131,14 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector split_string(const std::string& input, char deli int main(int argc, char ** argv) { srand(1234); - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) { return 1; } + gpt_init(); + // number of simultaneous "clients" to simulate const int32_t n_clients = params.n_parallel; diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index cfaf798ce..7ef8d14f3 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -15,8 +15,6 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; params.n_junk = 250; @@ -27,6 +25,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + int n_junk = params.n_junk; int n_keep = params.n_keep; int n_grp = params.grp_attn_n; diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 16aa06f42..18e75a7a2 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1957,8 +1957,6 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) { } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; params.n_ctx = 512; @@ -1968,6 +1966,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + const int32_t n_ctx = params.n_ctx; if (n_ctx <= 0) { diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index e2266eb41..5971690f1 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -112,14 +112,14 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_RETRIEVAL, print_usage)) { return 1; } + gpt_init(); + // For BERT models, batch size must be equal to ubatch size params.n_ubatch = params.n_batch; params.embedding = true; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3a3f3d066..6362ef556 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2316,8 +2316,6 @@ inline void signal_handler(int signal) { } int main(int argc, char ** argv) { - gpt_init(); - // own arguments required by this example gpt_params params; @@ -2325,6 +2323,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + const bool verbose = params.verbosity > 0; // struct that contains llama context and inference diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 21514c750..c2b7267c8 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -12,8 +12,6 @@ static void print_usage(int, char ** argv) { } int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; params.prompt = "Hello my name is"; @@ -23,6 +21,8 @@ int main(int argc, char ** argv) { return 1; } + gpt_init(); + // total length of the sequence including the prompt const int n_predict = params.n_predict; diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 1ee173335..fbac21811 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -30,14 +30,14 @@ struct seq_draft { }; int main(int argc, char ** argv) { - gpt_init(); - gpt_params params; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_SPECULATIVE)) { return 1; } + gpt_init(); + if (params.model_draft.empty()) { LOG_ERR("%s: --model-draft is required\n", __func__); return 1;