llama : make model stateless and context stateful (llama_state) (#1797)

* llama : make model stateless and context stateful

* llama : minor cleanup

* llama : update internal API declaration

* Apply suggestions from code review

fix style

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Missing model memory release

* Fix style

* Add deprecated warning for public API function llama_init_from_file

* Update public API use cases: move away from deprecated llama_init_from_file

* Deprecate public API function llama_apply_lora_from_file

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Didzis Gosko 2023-06-24 11:47:58 +03:00 committed by GitHub
parent d7b7484f74
commit 527b6fba1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 244 additions and 92 deletions

View file

@ -28,6 +28,7 @@ int main(int argc, char **argv) {
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
llama_model * model;
llama_context * ctx;
// load the vocab
@ -36,10 +37,18 @@ int main(int argc, char **argv) {
lparams.vocab_only = true;
ctx = llama_init_from_file(fname.c_str(), lparams);
model = llama_load_model_from_file(fname.c_str(), lparams);
if (model == NULL) {
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
return 1;
}
ctx = llama_new_context_with_model(model, lparams);
if (ctx == NULL) {
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
llama_free_model(model);
return 1;
}
}
@ -48,6 +57,8 @@ int main(int argc, char **argv) {
if (n_vocab != 32000) {
fprintf(stderr, "%s : expected 32000 tokens, got %d\n", __func__, n_vocab);
llama_free_model(model);
llama_free(ctx);
return 2;
}
@ -77,10 +88,13 @@ int main(int argc, char **argv) {
}
fprintf(stderr, "\n");
llama_free_model(model);
llama_free(ctx);
return 3;
}
}
llama_free_model(model);
llama_free(ctx);
return 0;