reusable buffers
This commit is contained in:
parent
cb1dec0ec0
commit
b1331d7e60
8 changed files with 129 additions and 137 deletions
|
@ -1569,6 +1569,8 @@ int main(int argc, char ** argv) {
|
||||||
int n_tokens = model.hparams.n_ctx;
|
int n_tokens = model.hparams.n_ctx;
|
||||||
int n_vocab = model.hparams.n_vocab;
|
int n_vocab = model.hparams.n_vocab;
|
||||||
|
|
||||||
|
auto compute_plan_buffer = std::vector<uint8_t>();
|
||||||
|
|
||||||
for (int ex=0; ex<n_examples; ++ex) {
|
for (int ex=0; ex<n_examples; ++ex) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/*.mem_size =*/ compute_size,
|
/*.mem_size =*/ compute_size,
|
||||||
|
@ -1598,13 +1600,10 @@ int main(int argc, char ** argv) {
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float error_before_opt = ggml_get_f32_1d(e, 0);
|
float error_before_opt = ggml_get_f32_1d(e, 0);
|
||||||
|
@ -1625,13 +1624,10 @@ int main(int argc, char ** argv) {
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float error_after_opt = ggml_get_f32_1d(e, 0);
|
float error_after_opt = ggml_get_f32_1d(e, 0);
|
||||||
|
@ -1689,13 +1685,10 @@ int main(int argc, char ** argv) {
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
||||||
|
|
|
@ -164,16 +164,15 @@ int main(int argc, char ** argv) {
|
||||||
TENSOR_DUMP(m11);
|
TENSOR_DUMP(m11);
|
||||||
TENSOR_DUMP(m2);
|
TENSOR_DUMP(m2);
|
||||||
|
|
||||||
|
auto compute_plan_buffer = std::vector<uint8_t>();
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(&gf, benchmark_params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_DUMP(gf.nodes[0]);
|
TENSOR_DUMP(gf.nodes[0]);
|
||||||
|
@ -229,15 +228,12 @@ int main(int argc, char ** argv) {
|
||||||
long long int start = ggml_time_us();
|
long long int start = ggml_time_us();
|
||||||
//printf("Running ggml_graph_compute\n");
|
//printf("Running ggml_graph_compute\n");
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(&gf31, benchmark_params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf31);
|
ggml_graph_compute(&plan, &gf31);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
long long int stop = ggml_time_us();
|
long long int stop = ggml_time_us();
|
||||||
|
@ -272,15 +268,12 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// Running a different graph computation to make sure we override the CPU cache lines
|
// Running a different graph computation to make sure we override the CPU cache lines
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(&gf32, benchmark_params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf32);
|
ggml_graph_compute(&plan, &gf32);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
|
@ -3181,6 +3181,8 @@ int main(int argc, char ** argv) {
|
||||||
GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
|
GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto compute_plan_buffer = std::vector<uint8_t>();
|
||||||
|
|
||||||
printf("%s: begin training\n", __func__);
|
printf("%s: begin training\n", __func__);
|
||||||
|
|
||||||
for (int ex = 0; ex < params.n_examples; ++ex) {
|
for (int ex = 0; ex < params.n_examples; ++ex) {
|
||||||
|
@ -3244,15 +3246,12 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(gf, params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, gf);
|
ggml_graph_compute(&plan, gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t used_mem_before_opt = ggml_used_mem(ctx0);
|
size_t used_mem_before_opt = ggml_used_mem(ctx0);
|
||||||
|
@ -3278,15 +3277,12 @@ int main(int argc, char ** argv) {
|
||||||
model.train_tokens += n_batch * n_tokens;
|
model.train_tokens += n_batch * n_tokens;
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(gf, params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(gf, params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, gf);
|
ggml_graph_compute(&plan, gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float error_after_opt = ggml_get_f32_1d(loss, 0);
|
float error_after_opt = ggml_get_f32_1d(loss, 0);
|
||||||
|
@ -3376,15 +3372,12 @@ int main(int argc, char ** argv) {
|
||||||
ggml_build_forward_expand(&gf, logits);
|
ggml_build_forward_expand(&gf, logits);
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, params.n_threads);
|
auto plan = ggml_graph_compute_make_plan(&gf, params.n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
||||||
|
|
3
ggml.c
3
ggml.c
|
@ -15974,7 +15974,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||||
const struct ggml_cgraph * cgraph = state->shared->cgraph;
|
const struct ggml_cgraph * cgraph = state->shared->cgraph;
|
||||||
|
|
||||||
const struct ggml_graph_compute_plan * plan = state->shared->plan;
|
const struct ggml_graph_compute_plan * plan = state->shared->plan;
|
||||||
const int *n_tasks_arr = plan->n_tasks;
|
const int * n_tasks_arr = plan->n_tasks;
|
||||||
|
|
||||||
const int n_threads = state->shared->n_threads;
|
const int n_threads = state->shared->n_threads;
|
||||||
set_numa_thread_affinity(state->ith, n_threads);
|
set_numa_thread_affinity(state->ith, n_threads);
|
||||||
|
@ -16490,6 +16490,7 @@ void ggml_graph_compute(struct ggml_graph_compute_plan * plan, struct ggml_cgrap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: avoid allocating memory frequently.
|
||||||
static void ggml_graph_compute_sugar(struct ggml_cgraph * cgraph, int n_threads) {
|
static void ggml_graph_compute_sugar(struct ggml_cgraph * cgraph, int n_threads) {
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(cgraph, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(cgraph, n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -449,7 +449,7 @@ extern "C" {
|
||||||
// Size of work buffer, calculated by `ggml_graph_compute_make_plan()`.
|
// Size of work buffer, calculated by `ggml_graph_compute_make_plan()`.
|
||||||
size_t work_size;
|
size_t work_size;
|
||||||
// Work buffer, to be allocated by caller before calling to `ggml_graph_compute()`.
|
// Work buffer, to be allocated by caller before calling to `ggml_graph_compute()`.
|
||||||
void * work_data;
|
uint8_t * work_data;
|
||||||
|
|
||||||
int n_threads;
|
int n_threads;
|
||||||
|
|
||||||
|
|
70
llama.cpp
70
llama.cpp
|
@ -321,6 +321,10 @@ struct llama_context {
|
||||||
// input embedding (1-dimensional array: [n_embd])
|
// input embedding (1-dimensional array: [n_embd])
|
||||||
std::vector<float> embedding;
|
std::vector<float> embedding;
|
||||||
|
|
||||||
|
// reusable buffer for `struct ggml_graph_compute_plan.work_data`
|
||||||
|
// std::vector guarantees the elements are stored contiguously.
|
||||||
|
std::vector<uint8_t> compute_plan_buffer;
|
||||||
|
|
||||||
// memory buffers used to evaluate the model
|
// memory buffers used to evaluate the model
|
||||||
// TODO: move in llama_state
|
// TODO: move in llama_state
|
||||||
llama_ctx_buffer buf_compute;
|
llama_ctx_buffer buf_compute;
|
||||||
|
@ -1591,10 +1595,13 @@ static bool llama_eval_internal(
|
||||||
// run the computation
|
// run the computation
|
||||||
ggml_build_forward_expand(&gf, cur);
|
ggml_build_forward_expand(&gf, cur);
|
||||||
|
|
||||||
|
bool call_ggml_graph_compute = true;
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
if (lctx.ctx_metal && N == 1) {
|
if (lctx.ctx_metal && N == 1) {
|
||||||
ggml_metal_graph_compute(lctx.ctx_metal, &gf);
|
ggml_metal_graph_compute(lctx.ctx_metal, &gf);
|
||||||
ggml_metal_get_tensor (lctx.ctx_metal, cur);
|
ggml_metal_get_tensor (lctx.ctx_metal, cur);
|
||||||
|
call_ggml_graph_compute = false;
|
||||||
} else {
|
} else {
|
||||||
// IMPORTANT:
|
// IMPORTANT:
|
||||||
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
|
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
|
||||||
|
@ -1611,33 +1618,18 @@ static bool llama_eval_internal(
|
||||||
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
|
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k);
|
||||||
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
|
ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
|
|
||||||
if (plan.work_size > 0) {
|
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gf);
|
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
{
|
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
|
|
||||||
if (plan.work_size > 0) {
|
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gf);
|
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (call_ggml_graph_compute) {
|
||||||
|
auto plan = ggml_graph_compute_make_plan(&gf, actual_n_threads);
|
||||||
|
if (plan.work_size > 0) {
|
||||||
|
lctx.compute_plan_buffer.resize(plan.work_size);
|
||||||
|
plan.work_data = lctx.compute_plan_buffer.data();
|
||||||
|
}
|
||||||
|
ggml_graph_compute(&plan, &gf);
|
||||||
|
}
|
||||||
|
|
||||||
if (cgraph_fname) {
|
if (cgraph_fname) {
|
||||||
ggml_graph_export(&gf, cgraph_fname);
|
ggml_graph_export(&gf, cgraph_fname);
|
||||||
}
|
}
|
||||||
|
@ -2822,6 +2814,9 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
|
||||||
// read tensors and apply
|
// read tensors and apply
|
||||||
bool warned = false;
|
bool warned = false;
|
||||||
int n_tensors = 0;
|
int n_tensors = 0;
|
||||||
|
|
||||||
|
auto compute_plan_buffer = std::vector<uint8_t>();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int32_t n_dims;
|
int32_t n_dims;
|
||||||
int32_t length;
|
int32_t length;
|
||||||
|
@ -2988,15 +2983,12 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
|
||||||
struct ggml_cgraph gf = ggml_build_forward(r);
|
struct ggml_cgraph gf = ggml_build_forward(r);
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
auto plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// we won't need these tensors again, reset the context to save memory
|
// we won't need these tensors again, reset the context to save memory
|
||||||
|
@ -3171,15 +3163,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
||||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
|
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
auto plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
ctx->compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = ctx->compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_free(cpy_ctx);
|
ggml_free(cpy_ctx);
|
||||||
|
@ -3287,15 +3276,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
||||||
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
|
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
auto plan = ggml_graph_compute_make_plan(&gf, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
plan.work_data = malloc(plan.work_size);
|
ctx->compute_plan_buffer.resize(plan.work_size);
|
||||||
GGML_ASSERT(plan.work_data);
|
plan.work_data = ctx->compute_plan_buffer.data();
|
||||||
}
|
}
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_free(cpy_ctx);
|
ggml_free(cpy_ctx);
|
||||||
|
|
|
@ -191,6 +191,32 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct compute_plan_buffer {
|
||||||
|
size_t size;
|
||||||
|
uint8_t * data;
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint8_t * ensure_plan_work_data(struct compute_plan_buffer *buf, size_t size) {
|
||||||
|
if (size == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(buf);
|
||||||
|
|
||||||
|
if (buf->size == 0) {
|
||||||
|
buf->data = malloc(size);
|
||||||
|
buf->size = size;
|
||||||
|
} else if (buf->size < size) {
|
||||||
|
buf->data = realloc(buf->data, size);
|
||||||
|
buf->size = size;
|
||||||
|
} else {
|
||||||
|
// skip shrinking.
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(buf->data);
|
||||||
|
return buf->data;
|
||||||
|
}
|
||||||
|
|
||||||
bool check_gradient(
|
bool check_gradient(
|
||||||
const char * op_name,
|
const char * op_name,
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
|
@ -218,6 +244,8 @@ bool check_gradient(
|
||||||
|
|
||||||
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
|
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
|
||||||
|
|
||||||
|
struct compute_plan_buffer plan_buf = { /*.size = */ 0, /*.data =*/ NULL };
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
||||||
if (plan.work_size > 0) {
|
if (plan.work_size > 0) {
|
||||||
|
@ -235,14 +263,8 @@ bool check_gradient(
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gb);
|
ggml_graph_compute(&plan, &gb);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
|
// ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
|
||||||
|
@ -259,14 +281,8 @@ bool check_gradient(
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const float f0 = ggml_get_f32_1d(f, 0);
|
const float f0 = ggml_get_f32_1d(f, 0);
|
||||||
|
@ -275,14 +291,8 @@ bool check_gradient(
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gf, n_threads);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gf);
|
ggml_graph_compute(&plan, &gf);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const float f1 = ggml_get_f32_1d(f, 0);
|
const float f1 = ggml_get_f32_1d(f, 0);
|
||||||
|
@ -297,14 +307,8 @@ bool check_gradient(
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&gb, n_threads);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &gb);
|
ggml_graph_compute(&plan, &gb);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const float g1 = get_element(x[i]->grad, k);
|
const float g1 = get_element(x[i]->grad, k);
|
||||||
|
@ -321,6 +325,10 @@ bool check_gradient(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (plan_buf.data) {
|
||||||
|
free(plan_buf.data);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,31 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
|
||||||
((float *)t->data)[idx] = value;
|
((float *)t->data)[idx] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct compute_plan_buffer {
|
||||||
|
size_t size;
|
||||||
|
uint8_t * data;
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint8_t * ensure_plan_work_data(struct compute_plan_buffer *buf, size_t size) {
|
||||||
|
if (size == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buf->size == 0) {
|
||||||
|
buf->data = malloc(size);
|
||||||
|
buf->size = size;
|
||||||
|
} else if (buf->size < size) {
|
||||||
|
buf->data = realloc(buf->data, size);
|
||||||
|
buf->size = size;
|
||||||
|
} else {
|
||||||
|
// skip shrinking.
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(buf->data);
|
||||||
|
return buf->data;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, const char ** argv) {
|
int main(int argc, const char ** argv) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
.mem_size = 1024*1024*1024,
|
.mem_size = 1024*1024*1024,
|
||||||
|
@ -141,16 +166,11 @@ int main(int argc, const char ** argv) {
|
||||||
struct ggml_cgraph ge = ggml_build_forward(e);
|
struct ggml_cgraph ge = ggml_build_forward(e);
|
||||||
ggml_graph_reset (&ge);
|
ggml_graph_reset (&ge);
|
||||||
|
|
||||||
|
struct compute_plan_buffer plan_buf = { /*.size = */ 0, /*.data =*/ NULL };
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &ge);
|
ggml_graph_compute(&plan, &ge);
|
||||||
if (plan.work_data) {
|
|
||||||
free(plan.work_data);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const float fe = ggml_get_f32_1d(e, 0);
|
const float fe = ggml_get_f32_1d(e, 0);
|
||||||
|
@ -164,14 +184,12 @@ int main(int argc, const char ** argv) {
|
||||||
|
|
||||||
{
|
{
|
||||||
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
|
struct ggml_graph_compute_plan plan = ggml_graph_compute_make_plan(&ge, /*n_threads*/ 1);
|
||||||
if (plan.work_size > 0) {
|
plan.work_data = ensure_plan_work_data(&plan_buf, plan.work_size);
|
||||||
plan.work_data = malloc(plan.work_size);
|
|
||||||
GGML_ASSERT(plan.work_data);
|
|
||||||
}
|
|
||||||
ggml_graph_compute(&plan, &ge);
|
ggml_graph_compute(&plan, &ge);
|
||||||
if (plan.work_data) {
|
}
|
||||||
free(plan.work_data);
|
|
||||||
}
|
if (plan_buf.data) {
|
||||||
|
free(plan_buf.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
const float fe_opt = ggml_get_f32_1d(e, 0);
|
const float fe_opt = ggml_get_f32_1d(e, 0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue