llama : make model stateless and context stateful (llama_state) (#1797)

* llama : make model stateless and context stateful

* llama : minor cleanup

* llama : update internal API declaration

* Apply suggestions from code review

fix style

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Missing model memory release

* Fix style

* Add deprecated warning for public API function llama_init_from_file

* Update public API use cases: move away from deprecated llama_init_from_file

* Deprecate public API function llama_apply_lora_from_file

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Didzis Gosko 2023-06-24 11:47:58 +03:00 committed by GitHub
parent d7b7484f74
commit 527b6fba1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 244 additions and 92 deletions

View file

@ -115,6 +115,7 @@ struct llama_server_context {
std::vector<llama_token> embd;
std::vector<llama_token> last_n_tokens;
llama_model * model = nullptr;
llama_context * ctx = nullptr;
gpt_params params;
@ -130,6 +131,10 @@ struct llama_server_context {
llama_free(ctx);
ctx = nullptr;
}
if (model) {
llama_free_model(model);
model = nullptr;
}
}
void rewind() {
@ -150,8 +155,8 @@ struct llama_server_context {
bool loadModel(const gpt_params & params_) {
params = params_;
ctx = llama_init_from_gpt_params(params);
if (ctx == nullptr) {
std::tie(model, ctx) = llama_init_from_gpt_params(params);
if (model == nullptr) {
LOG_ERROR("unable to load model", { { "model", params_.model } });
return false;
}