llama : Add ability to cancel model load

Updated llama_progress_callback so that if it returns false, the model
loading is aborted.
This commit is contained in:
crasm 2023-12-14 04:03:25 -05:00
parent 55e87c3749
commit 9abe2e44d1
2 changed files with 36 additions and 15 deletions

View file

@ -126,7 +126,7 @@ extern "C" {
bool sorted;
} llama_token_data_array;
typedef void (*llama_progress_callback)(float progress, void *ctx);
typedef bool (*llama_progress_callback)(float progress, void *ctx);
// Input data for llama_decode
// A llama_batch object can contain input about one or many sequences
@ -179,7 +179,9 @@ extern "C" {
int32_t main_gpu; // the GPU that is used for scratch and small tensors
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
// called with a progress value between 0 and 1, pass NULL to disable
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
// If the provided progress_callback returns true, model loading continues.
// If it returns false, model loading is immediately aborted.
llama_progress_callback progress_callback;
// context pointer passed to the progress callback