llama : unified KV cache + batch inference API

This commit is contained in:
Georgi Gerganov 2023-09-18 10:08:22 +03:00
parent fad56936d4
commit d29e76937c
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
10 changed files with 315 additions and 236 deletions

View file

@ -198,15 +198,6 @@ int main(int argc, char ** argv) {
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
}
// export the cgraph and exit
if (params.export_cgraph) {
llama_eval_export(ctx, "llama.ggml");
llama_free(ctx);
llama_free_model(model);
return 0;
}
std::string path_session = params.path_prompt_cache;
std::vector<llama_token> session_tokens;