sync : ggml (#5452)
* ggml-alloc : v3 (ggml/727) * ggml-alloc v3 ggml-ci * fix ci ggml-ci * whisper : check for backend buffer allocation failures * whisper : avoid leaks when initialization fails * cleanup ggml-ci * style fixes ggml-ci * sync : ggml * update llama.cpp, clip.cpp, export-lora.cpp * update finetune.cpp, train-text-from-scratch.cpp ggml-ci * ggml-backend : reduce alignment to 32 to match gguf and fix mmap --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
3bdc4cd0f5
commit
3b169441df
12 changed files with 1287 additions and 1362 deletions
|
@ -337,24 +337,14 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
|
|||
params.mem_buffer = NULL;
|
||||
params.no_alloc = true;
|
||||
struct ggml_context * ctx = NULL;
|
||||
struct ggml_allocr * alloc = NULL;
|
||||
struct ggml_cgraph * gf = NULL;
|
||||
struct ggml_gallocr * alloc = NULL;
|
||||
struct ggml_cgraph * gf = NULL;
|
||||
|
||||
ctx = ggml_init(params);
|
||||
alloc = ggml_allocr_new_measure(tensor_alignment);
|
||||
alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
||||
gf = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling);
|
||||
size_t alloc_size = ggml_allocr_alloc_graph(alloc, gf);
|
||||
ggml_allocr_free(alloc);
|
||||
ggml_free(ctx);
|
||||
|
||||
static std::vector<uint8_t> data_compute;
|
||||
data_compute.resize(alloc_size + tensor_alignment);
|
||||
|
||||
ctx = ggml_init(params);
|
||||
alloc = ggml_allocr_new(data_compute.data(), data_compute.size(), tensor_alignment);
|
||||
gf = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling);
|
||||
ggml_allocr_alloc_graph(alloc, gf);
|
||||
ggml_allocr_free(alloc);
|
||||
ggml_gallocr_alloc_graph(alloc, gf);
|
||||
|
||||
struct ggml_cplan cplan = ggml_graph_plan(gf, n_threads);
|
||||
static std::vector<uint8_t> data_work;
|
||||
|
@ -363,6 +353,7 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
|
|||
|
||||
ggml_graph_compute(gf, &cplan);
|
||||
|
||||
ggml_gallocr_free(alloc);
|
||||
ggml_free(ctx);
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue