reusable buffers

This commit is contained in:
mqy 2023-07-04 20:38:46 +08:00
parent cb1dec0ec0
commit b1331d7e60
8 changed files with 129 additions and 137 deletions

View file

@ -1569,6 +1569,8 @@ int main(int argc, char ** argv) {
int n_tokens = model.hparams.n_ctx; int n_tokens = model.hparams.n_ctx;
int n_vocab = model.hparams.n_vocab; int n_vocab = model.hparams.n_vocab;
auto compute_plan_buffer = std::vector<uint8_t>();
for (int ex=0; ex<n_examples; ++ex) { for (int ex=0; ex<n_examples; ++ex) {
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ compute_size, /*.mem_size =*/ compute_size,
@ -1598,13 +1600,10 @@ int main(int argc, char ** argv) {
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
float error_before_opt = ggml_get_f32_1d(e, 0); float error_before_opt = ggml_get_f32_1d(e, 0);
@ -1625,13 +1624,10 @@ int main(int argc, char ** argv) {
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
float error_after_opt = ggml_get_f32_1d(e, 0); float error_after_opt = ggml_get_f32_1d(e, 0);
@ -1689,13 +1685,10 @@ int main(int argc, char ** argv) {
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx); struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);

View file

@ -164,16 +164,15 @@ int main(int argc, char ** argv) {
TENSOR_DUMP(m11); TENSOR_DUMP(m11);
TENSOR_DUMP(m2); TENSOR_DUMP(m2);
auto compute_plan_buffer = std::vector<uint8_t>();
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads); auto plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
TENSOR_DUMP(gf.nodes[0]); TENSOR_DUMP(gf.nodes[0]);
@ -229,15 +228,12 @@ int main(int argc, char ** argv) {
long long int start = ggml_time_us(); long long int start = ggml_time_us();
//printf("Running ggml_graph_compute\n"); //printf("Running ggml_graph_compute\n");
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads); auto plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf31); ggml_graph_compute(&plan, &gf31);
if (plan.work_data) {
free(plan.work_data);
}
} }
long long int stop = ggml_time_us(); long long int stop = ggml_time_us();
@ -272,15 +268,12 @@ int main(int argc, char ** argv) {
// Running a different graph computation to make sure we override the CPU cache lines // Running a different graph computation to make sure we override the CPU cache lines
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads); auto plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf32); ggml_graph_compute(&plan, &gf32);
if (plan.work_data) {
free(plan.work_data);
}
} }
} }
printf("\n"); printf("\n");

View file

@ -3181,6 +3181,8 @@ int main(int argc, char ** argv) {
GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size()); GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
} }
auto compute_plan_buffer = std::vector<uint8_t>();
printf("%s: begin training\n", __func__); printf("%s: begin training\n", __func__);
for (int ex = 0; ex < params.n_examples; ++ex) { for (int ex = 0; ex < params.n_examples; ++ex) {
@ -3244,15 +3246,12 @@ int main(int argc, char ** argv) {
} }
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads); auto plan = ggml_graph_compute_make_plan(gf, params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, gf); ggml_graph_compute(&plan, gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
size_t used_mem_before_opt = ggml_used_mem(ctx0); size_t used_mem_before_opt = ggml_used_mem(ctx0);
@ -3278,15 +3277,12 @@ int main(int argc, char ** argv) {
model.train_tokens += n_batch * n_tokens; model.train_tokens += n_batch * n_tokens;
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads); auto plan = ggml_graph_compute_make_plan(gf, params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, gf); ggml_graph_compute(&plan, gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
float error_after_opt = ggml_get_f32_1d(loss, 0); float error_after_opt = ggml_get_f32_1d(loss, 0);
@ -3376,15 +3372,12 @@ int main(int argc, char ** argv) {
ggml_build_forward_expand(&gf, logits); ggml_build_forward_expand(&gf, logits);
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, params.n_threads); auto plan = ggml_graph_compute_make_plan(&gf, params.n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx); //struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);

3
ggml.c
View file

@ -15974,7 +15974,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
const struct ggml_cgraph * cgraph = state->shared->cgraph; const struct ggml_cgraph * cgraph = state->shared->cgraph;
const struct ggml_graph_compute_plan * plan = state->shared->plan; const struct ggml_graph_compute_plan * plan = state->shared->plan;
const int *n_tasks_arr = plan->n_tasks; const int * n_tasks_arr = plan->n_tasks;
const int n_threads = state->shared->n_threads; const int n_threads = state->shared->n_threads;
set_numa_thread_affinity(state->ith, n_threads); set_numa_thread_affinity(state->ith, n_threads);
@ -16490,6 +16490,7 @@ void ggml_graph_compute(struct ggml_graph_compute_plan * plan, struct ggml_cgrap
} }
} }
// TODO: avoid allocating memory frequently.
static void ggml_graph_compute_sugar(struct ggml_cgraph * cgraph, int n_threads) { static void ggml_graph_compute_sugar(struct ggml_cgraph * cgraph, int n_threads) {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(cgraph, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(cgraph, n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {

2
ggml.h
View file

@ -449,7 +449,7 @@ extern "C" {
// Size of work buffer, calculated by `ggml_graph_compute_make_plan()`. // Size of work buffer, calculated by `ggml_graph_compute_make_plan()`.
size_t work_size; size_t work_size;
// Work buffer, to be allocated by caller before calling to `ggml_graph_compute()`. // Work buffer, to be allocated by caller before calling to `ggml_graph_compute()`.
void * work_data; uint8_t * work_data;
int n_threads; int n_threads;

View file

@ -321,6 +321,10 @@ struct llama_context {
// input embedding (1-dimensional array: [n_embd]) // input embedding (1-dimensional array: [n_embd])
std::vector<float> embedding; std::vector<float> embedding;
// reusable buffer for `struct ggml_graph_compute_plan.work_data`
// std::vector guarantees the elements are stored contiguously.
std::vector<uint8_t> compute_plan_buffer;
// memory buffers used to evaluate the model // memory buffers used to evaluate the model
// TODO: move in llama_state // TODO: move in llama_state
llama_ctx_buffer buf_compute; llama_ctx_buffer buf_compute;
@ -1591,10 +1595,13 @@ static bool llama_eval_internal(
// run the computation // run the computation
ggml_build_forward_expand(&gf, cur); ggml_build_forward_expand(&gf, cur);
bool call_ggml_graph_compute = true;
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
if (lctx.ctx_metal && N == 1) { if (lctx.ctx_metal && N == 1) {
ggml_metal_graph_compute(lctx.ctx_metal, &gf); ggml_metal_graph_compute(lctx.ctx_metal, &gf);
ggml_metal_get_tensor (lctx.ctx_metal, cur); ggml_metal_get_tensor (lctx.ctx_metal, cur);
call_ggml_graph_compute = false;
} else { } else {
// IMPORTANT: // IMPORTANT:
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla // Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
@ -1611,33 +1618,18 @@ static bool llama_eval_internal(
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k); ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v); ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
} }
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
}
}
#else
{
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
#endif #endif
if (call_ggml_graph_compute) {
auto plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
if (plan.work_size > 0) {
lctx.compute_plan_buffer.resize(plan.work_size);
plan.work_data = lctx.compute_plan_buffer.data();
}
ggml_graph_compute(&plan, &gf);
}
if (cgraph_fname) { if (cgraph_fname) {
ggml_graph_export(&gf, cgraph_fname); ggml_graph_export(&gf, cgraph_fname);
} }
@ -2822,6 +2814,9 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
// read tensors and apply // read tensors and apply
bool warned = false; bool warned = false;
int n_tensors = 0; int n_tensors = 0;
auto compute_plan_buffer = std::vector<uint8_t>();
while (true) { while (true) {
int32_t n_dims; int32_t n_dims;
int32_t length; int32_t length;
@ -2988,15 +2983,12 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
struct ggml_cgraph gf = ggml_build_forward(r); struct ggml_cgraph gf = ggml_build_forward(r);
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads); auto plan = ggml_graph_compute_make_plan(&gf, n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
// we won't need these tensors again, reset the context to save memory // we won't need these tensors again, reset the context to save memory
@ -3171,15 +3163,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1); auto plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); ctx->compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = ctx->compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
ggml_free(cpy_ctx); ggml_free(cpy_ctx);
@ -3287,15 +3276,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1); auto plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
if (plan.work_size > 0) { if (plan.work_size > 0) {
plan.work_data = malloc(plan.work_size); ctx->compute_plan_buffer.resize(plan.work_size);
GGML_ASSERT(plan.work_data); plan.work_data = ctx->compute_plan_buffer.data();
} }
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
ggml_free(cpy_ctx); ggml_free(cpy_ctx);

View file

@ -191,6 +191,32 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
} }
struct compute_plan_buffer {
size_t size;
uint8_t * data;
};
static uint8_t * ensure_plan_work_data(struct compute_plan_buffer *buf, size_t size) {
if (size == 0) {
return NULL;
}
GGML_ASSERT(buf);
if (buf->size == 0) {
buf->data = malloc(size);
buf->size = size;
} else if (buf->size < size) {
buf->data = realloc(buf->data, size);
buf->size = size;
} else {
// skip shrinking.
}
GGML_ASSERT(buf->data);
return buf->data;
}
bool check_gradient( bool check_gradient(
const char * op_name, const char * op_name,
struct ggml_context * ctx0, struct ggml_context * ctx0,
@ -218,6 +244,8 @@ bool check_gradient(
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false); struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
struct compute_plan_buffer plan_buf = { /*.size = */ 0, /*.data =*/ NULL };
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
if (plan.work_size > 0) { if (plan.work_size > 0) {
@ -235,14 +263,8 @@ bool check_gradient(
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gb); ggml_graph_compute(&plan, &gb);
if (plan.work_data) {
free(plan.work_data);
}
} }
// ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot"); // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
@ -259,14 +281,8 @@ bool check_gradient(
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
const float f0 = ggml_get_f32_1d(f, 0); const float f0 = ggml_get_f32_1d(f, 0);
@ -275,14 +291,8 @@ bool check_gradient(
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gf); ggml_graph_compute(&plan, &gf);
if (plan.work_data) {
free(plan.work_data);
}
} }
const float f1 = ggml_get_f32_1d(f, 0); const float f1 = ggml_get_f32_1d(f, 0);
@ -297,14 +307,8 @@ bool check_gradient(
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &gb); ggml_graph_compute(&plan, &gb);
if (plan.work_data) {
free(plan.work_data);
}
} }
const float g1 = get_element(x[i]->grad, k); const float g1 = get_element(x[i]->grad, k);
@ -321,6 +325,10 @@ bool check_gradient(
} }
} }
if (plan_buf.data) {
free(plan_buf.data);
}
return true; return true;
} }

View file

@ -114,6 +114,31 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
((float *)t->data)[idx] = value; ((float *)t->data)[idx] = value;
} }
struct compute_plan_buffer {
size_t size;
uint8_t * data;
};
static uint8_t * ensure_plan_work_data(struct compute_plan_buffer *buf, size_t size) {
if (size == 0) {
return NULL;
}
if (buf->size == 0) {
buf->data = malloc(size);
buf->size = size;
} else if (buf->size < size) {
buf->data = realloc(buf->data, size);
buf->size = size;
} else {
// skip shrinking.
}
GGML_ASSERT(buf->data);
return buf->data;
}
int main(int argc, const char ** argv) { int main(int argc, const char ** argv) {
struct ggml_init_params params = { struct ggml_init_params params = {
.mem_size = 1024*1024*1024, .mem_size = 1024*1024*1024,
@ -141,16 +166,11 @@ int main(int argc, const char ** argv) {
struct ggml_cgraph ge = ggml_build_forward(e); struct ggml_cgraph ge = ggml_build_forward(e);
ggml_graph_reset (&ge); ggml_graph_reset (&ge);
struct compute_plan_buffer plan_buf = { /*.size = */ 0, /*.data =*/ NULL };
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &ge); ggml_graph_compute(&plan, &ge);
if (plan.work_data) {
free(plan.work_data);
}
} }
const float fe = ggml_get_f32_1d(e, 0); const float fe = ggml_get_f32_1d(e, 0);
@ -164,14 +184,12 @@ int main(int argc, const char ** argv) {
{ {
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1); struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
if (plan.work_size > 0) { plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
plan.work_data = malloc(plan.work_size);
GGML_ASSERT(plan.work_data);
}
ggml_graph_compute(&plan, &ge); ggml_graph_compute(&plan, &ge);
if (plan.work_data) { }
free(plan.work_data);
} if (plan_buf.data) {
free(plan_buf.data);
} }
const float fe_opt = ggml_get_f32_1d(e, 0); const float fe_opt = ggml_get_f32_1d(e, 0);