diff --git a/examples/common.cpp b/examples/common.cpp index 35be2b5aa..4ee1ea79a 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -556,7 +556,7 @@ std::vector llama_tokenize(struct llama_context * ctx, const std::s return res; } -std::tuple llama_init_from_gpt_params(const gpt_params & params) { +struct llama_context_params llama_get_context_params_from_gpt_params(const gpt_params & params) { auto lparams = llama_context_default_params(); lparams.n_ctx = params.n_ctx; @@ -572,17 +572,23 @@ std::tuple llama_init_from_gpt_params(const gpt_params & params) { + auto lparams = llama_get_context_params_from_gpt_params(params); + llama_model * model = llama_load_model_from_file(params.model.c_str(), lparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); - return std::make_tuple(nullptr, nullptr, lparams); + return std::make_tuple(nullptr, nullptr); } llama_context * lctx = llama_new_context_with_model(model, lparams); if (lctx == NULL) { fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str()); llama_free_model(model); - return std::make_tuple(nullptr, nullptr, lparams); + return std::make_tuple(nullptr, nullptr); } if (!params.lora_adapter.empty()) { @@ -594,11 +600,11 @@ std::tuple llama_tokenize(struct llama_context * ctx, const std::s // Model utils // -std::tuple llama_init_from_gpt_params(const gpt_params & params); +std::tuple llama_init_from_gpt_params(const gpt_params & params); +struct llama_context_params llama_get_context_params_from_gpt_params(const gpt_params & params); // // Console utils diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 576ac0af0..5fa4942be 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -42,7 +42,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) { g_ctx = &ctx; // load the model and apply lora adapter, if any - std::tie(model, ctx, std::ignore) = llama_init_from_gpt_params(params); + std::tie(model, ctx) = llama_init_from_gpt_params(params); if (model == NULL) { fprintf(stderr, "%s: error: unable to load model\n", __func__); return nullptr; diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 7b1135e6a..03e801c2a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -41,7 +41,7 @@ int main(int argc, char ** argv) { llama_context * ctx; // load the model - std::tie(model, ctx, std::ignore) = llama_init_from_gpt_params(params); + std::tie(model, ctx) = llama_init_from_gpt_params(params); if (model == NULL) { fprintf(stderr, "%s: error: unable to load model\n", __func__); return 1; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 88262c920..dbc384f52 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -124,12 +124,12 @@ int main(int argc, char ** argv) { llama_model * model; llama_context * ctx; llama_context * guidance_ctx = NULL; - struct llama_context_params lparams; g_ctx = &ctx; // load the model and apply lora adapter, if any - std::tie(model, ctx, lparams) = llama_init_from_gpt_params(params); + std::tie(model, ctx) = llama_init_from_gpt_params(params); if (params.cfg_scale > 1.f) { + struct llama_context_params lparams = llama_get_context_params_from_gpt_params(params); guidance_ctx = llama_new_context_with_model(model, lparams); } diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 768c2b400..fd4b03cb2 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -153,7 +153,7 @@ int main(int argc, char ** argv) { llama_context * ctx; // load the model and apply lora adapter, if any - std::tie(model, ctx, std::ignore) = llama_init_from_gpt_params(params); + std::tie(model, ctx) = llama_init_from_gpt_params(params); if (model == NULL) { fprintf(stderr, "%s: error: unable to load model\n", __func__); return 1; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 55cf1c94d..2cbfc0018 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -245,7 +245,7 @@ struct llama_server_context bool loadModel(const gpt_params ¶ms_) { params = params_; - std::tie(model, ctx, std::ignore) = llama_init_from_gpt_params(params); + std::tie(model, ctx) = llama_init_from_gpt_params(params); if (model == nullptr) { LOG_ERROR("unable to load model", {{"model", params_.model}}); diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index f59788865..2d913cebb 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -71,7 +71,7 @@ int main(int argc, char ** argv) llama_model * model; llama_context * ctx; - std::tie(model, ctx, std::ignore) = llama_init_from_gpt_params( params ); + std::tie(model, ctx) = llama_init_from_gpt_params( params ); if ( model == NULL ) {