llama : Add ability to cancel model load
Updated llama_progress_callback so that if it returns false, the model loading is aborted.
This commit is contained in:
parent
55e87c3749
commit
9abe2e44d1
2 changed files with 36 additions and 15 deletions
6
llama.h
6
llama.h
|
@ -126,7 +126,7 @@ extern "C" {
|
|||
bool sorted;
|
||||
} llama_token_data_array;
|
||||
|
||||
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
||||
typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
||||
|
||||
// Input data for llama_decode
|
||||
// A llama_batch object can contain input about one or many sequences
|
||||
|
@ -179,7 +179,9 @@ extern "C" {
|
|||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
||||
|
||||
// called with a progress value between 0 and 1, pass NULL to disable
|
||||
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
||||
// If the provided progress_callback returns true, model loading continues.
|
||||
// If it returns false, model loading is immediately aborted.
|
||||
llama_progress_callback progress_callback;
|
||||
|
||||
// context pointer passed to the progress callback
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue