llama : unified KV cache + batch inference API
This commit is contained in:
parent
fad56936d4
commit
d29e76937c
10 changed files with 315 additions and 236 deletions
|
@ -198,15 +198,6 @@ int main(int argc, char ** argv) {
|
|||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||
}
|
||||
|
||||
// export the cgraph and exit
|
||||
if (params.export_cgraph) {
|
||||
llama_eval_export(ctx, "llama.ggml");
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string path_session = params.path_prompt_cache;
|
||||
std::vector<llama_token> session_tokens;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue