now it might even run

This commit is contained in:
mike dupont 2023-11-21 14:48:47 -05:00
parent ee9b0bceeb
commit 22359f7afe
5 changed files with 138 additions and 150 deletions

View file

@ -598,7 +598,9 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
const bool enable_flash_attn, const bool enable_flash_attn,
const bool enable_checkpointing) { const bool enable_checkpointing) {
ggml_set_scratch(ctx, { 0, 0, nullptr, }); //FIXME
assert(0);
//ggml_set_scratch(ctx, { 0, 0, nullptr, });
const int n_past = 0; const int n_past = 0;
const int N = n_tokens; const int N = n_tokens;
const auto & hparams = model->hparams; const auto & hparams = model->hparams;

View file

@ -311,7 +311,8 @@ static struct ggml_tensor * llama_build_train_graphs(
const bool enable_flash_attn, const bool enable_flash_attn,
const bool enable_checkpointing) { const bool enable_checkpointing) {
ggml_set_scratch(ctx, { 0, 0, nullptr, }); assert(0);
//ggml_set_scratch(ctx, { 0, 0, nullptr, });
const int n_past = 0; const int n_past = 0;
const int N = n_tokens; const int N = n_tokens;
const auto & hparams = model->hparams; const auto & hparams = model->hparams;

264
ggml.cpp
View file

@ -2244,12 +2244,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
{ {
const uint64_t t_start = ggml_time_us(); UNUSED(t_start); const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
// TODOFIXME g_state = ggml_state();
// g_state = (struct ggml_state) {
// struct ggml_context_container contexts[64];
///g_state.contexts[0][0] = 0 ;
g_state.numa.n_nodes = 0;
g_state.numa.total_cpus = 0;
@ -2302,18 +2298,18 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
// FIXME
// *ctx = (struct ggml_context) { (*ctx).mem_size = mem_size;
// /*.mem_size =*/ mem_size, (*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
// /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), (*ctx).mem_buffer_owned = params.mem_buffer ? false : true;
// /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, (*ctx).no_alloc = params.no_alloc;
// /*.no_alloc =*/ params.no_alloc, (*ctx).no_alloc_save = params.no_alloc;
// /*.no_alloc_save =*/ params.no_alloc, (*ctx).n_objects = 0;
// /*.n_objects =*/ 0, (*ctx).objects_begin = NULL;
// /*.objects_begin =*/ NULL, (*ctx).objects_end = NULL;
// /*.objects_end =*/ NULL, ggml_scratch a;
// /*.scratch =*/ { 0, 0, NULL, }, (*ctx).scratch = a;
// /*.scratch_save =*/ { 0, 0, NULL, }, (*ctx).scratch_save = a;
// }; // };
GGML_ASSERT(ctx->mem_buffer != NULL); GGML_ASSERT(ctx->mem_buffer != NULL);
@ -2449,14 +2445,11 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
assert(false); assert(false);
return NULL; return NULL;
} }
//*obj_new = //(struct ggml_object) {
// FIXME (*obj_new).offs = cur_end + GGML_OBJECT_SIZE;
// *obj_new = (struct ggml_object) { (*obj_new).size = size_needed;
// .offs = cur_end + GGML_OBJECT_SIZE, (*obj_new).next = NULL;
// .size = size_needed, (*obj_new).type = type;
// .next = NULL,
// .type = type,
// };
ggml_assert_aligned(mem_buffer + obj_new->offs); ggml_assert_aligned(mem_buffer + obj_new->offs);
@ -2528,29 +2521,30 @@ static struct ggml_tensor * ggml_new_tensor_impl(
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
// FIXME
// *result = (struct ggml_tensor) { // *result = (struct ggml_tensor) {
// /*.type =*/ type, (*result).type = type;
// /*.backend =*/ GGML_BACKEND_CPU, (*result).backend = GGML_BACKEND_CPU;
// /*.buffer =*/ NULL, (*result).buffer = NULL;
// /*.n_dims =*/ n_dims, (*result).n_dims = n_dims;
// /*.ne =*/ { 1, 1, 1, 1 }, for (int i =0; i < 4; i++){
// /*.nb =*/ { 0, 0, 0, 0 }, (*result).ne[i] = 1;
// /*.op =*/ GGML_OP_NONE, (*result).nb[i] = 0;
// /*.op_params =*/ { 0 }, }
// /*.is_param =*/ false, (*result).op = GGML_OP_NONE;
// /*.grad =*/ NULL, (*result).op_params[0] = 0 ;
// /*.src =*/ { NULL }, (*result).is_param = false;
// /*.perf_runs =*/ 0, (*result).grad = NULL;
// /*.perf_cycles =*/ 0, (*result).src[0] = NULL ;
// /*.perf_time_us =*/ 0, (*result).perf_runs = 0;
// /*.view_src =*/ view_src, (*result).perf_cycles = 0;
// /*.view_offs =*/ view_offs, (*result).perf_time_us = 0;
// /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, (*result).view_src = view_src;
// /*.name =*/ { 0 }, (*result).view_offs = view_offs;
// /*.extra =*/ NULL, (*result).data =obj_alloc_size > 0 ? (void *)(result + 1) : data;
// /*.padding =*/ { 0 }, (*result).name[0] = 0 ;
// }; (*result).extra = NULL;
(*result).padding[0] = 0 ;
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
//ggml_assert_aligned(result->data); //ggml_assert_aligned(result->data);
@ -15591,20 +15585,19 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph)); (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *)); memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
// FIXME
// *cgraph = (struct ggml_cgraph) { (*cgraph).size = size;
// /*.size =*/ size, (*cgraph).n_nodes = 0;
// /*.n_nodes =*/ 0, (*cgraph).n_leafs = 0;
// /*.n_leafs =*/ 0, (*cgraph).nodes = nodes_ptr;
// /*.nodes =*/ nodes_ptr, (*cgraph).grads = grads_ptr;
// /*.grads =*/ grads_ptr, (*cgraph).leafs = leafs_ptr;
// /*.leafs =*/ leafs_ptr, //(*cgraph).hash_table = { hash_size, hash_keys_ptr };
// /*.hash_table =*/ { hash_size, hash_keys_ptr }, (*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
// /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, (*cgraph).perf_runs = 0;
// /*.perf_runs =*/ 0, (*cgraph).perf_cycles = 0;
// /*.perf_cycles =*/ 0, (*cgraph).perf_time_us = 0;
// /*.perf_time_us =*/ 0,
// };
return cgraph; return cgraph;
} }
@ -15619,17 +15612,17 @@ struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgra
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
// *cgraph = (struct ggml_cgraph) { // *cgraph = (struct ggml_cgraph) {
// /*.size =*/ 0, (*cgraph).size = 0;
// /*.n_nodes =*/ i1 - i0, (*cgraph).n_nodes = i1 - i0;
// /*.n_leafs =*/ 0, (*cgraph).n_leafs = 0;
// /*.nodes =*/ cgraph0->nodes + i0, (*cgraph).nodes = cgraph0->nodes + i0;
// /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL, (*cgraph).grads = cgraph0->grads ? cgraph0->grads + i0 : NULL;
// /*.leafs =*/ NULL, (*cgraph).leafs = NULL;
// /*.hash_table =*/ { 0, NULL }, //(*cgraph).hash_table = { 0, NULL };
// /*.order =*/ cgraph0->order, (*cgraph).order = cgraph0->order;
// /*.perf_runs =*/ 0, (*cgraph).perf_runs = 0;
// /*.perf_cycles =*/ 0, (*cgraph).perf_cycles = 0;
// /*.perf_time_us =*/ 0, (*cgraph).perf_time_us = 0;
// }; // };
return cgraph; return cgraph;
@ -16395,12 +16388,10 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
// create thread pool // create thread pool
if (n_threads > 1) { if (n_threads > 1) {
for (int j = 1; j < n_threads; ++j) { for (int j = 1; j < n_threads; ++j) {
// FIXME
// workers[j] = (struct ggml_compute_state) { // workers[j] = (struct ggml_compute_state) {
// .thrd = 0, workers[j].thrd = 0;
// .ith = j, workers[j].ith = j;
// .shared = &state_shared, workers[j].shared = &state_shared;
// };
const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]); const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
GGML_ASSERT(rc == 0); GGML_ASSERT(rc == 0);
@ -16719,12 +16710,12 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
// create the data context // create the data context
{ {
const size_t overhead = 1*ggml_tensor_overhead(); const size_t overhead = 1*ggml_tensor_overhead();
GGML_ASSERT(0);
// FIXME // FIXME
struct ggml_init_params params;// = { struct ggml_init_params params;// = {
// .mem_size = fsize + overhead, params.mem_size = fsize + overhead,
// .mem_buffer = NULL, params.mem_buffer = NULL,
// .no_alloc = false, params.no_alloc = false,
// }; // };
*ctx_data = ggml_init(params); *ctx_data = ggml_init(params);
@ -16777,12 +16768,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
{ {
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false); const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
// FIXME
struct ggml_init_params params;// = { struct ggml_init_params params;// = {
// .mem_size = size_eval + overhead, params.mem_size = size_eval + overhead,
// .mem_buffer = NULL, params.mem_buffer = NULL,
// .no_alloc = true, params.no_alloc = true,
// };
*ctx_eval = ggml_init(params); *ctx_eval = ggml_init(params);
@ -17545,7 +17534,7 @@ static enum ggml_opt_result linesearch_backtracking(
} else { } else {
if(params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE) { if(params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE) {
// regular Wolfe conditions // regular Wolfe conditions
return count; return (ggml_opt_result)count;
} }
if(dg > -params->lbfgs.wolfe*dginit) { if(dg > -params->lbfgs.wolfe*dginit) {
@ -17623,7 +17612,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
float * gp = (float*)opt->lbfgs.gp->data; // previous gradient float * gp = (float*)opt->lbfgs.gp->data; // previous gradient
float * d = (float*)opt->lbfgs.d->data; // search direction float * d = (float*)opt->lbfgs.d->data; // search direction
float * pf = params.past > 0 ? opt->lbfgs.pf->data : NULL; // past function values float * pf = params.past > 0 ? (float*)opt->lbfgs.pf->data : NULL; // past function values
const int n_accum = MAX(1, params.n_gradient_accumulation); const int n_accum = MAX(1, params.n_gradient_accumulation);
const float accum_norm = 1.0f / (float) n_accum; const float accum_norm = 1.0f / (float) n_accum;
@ -17847,66 +17836,61 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
switch (type) { switch (type) {
case GGML_OPT_ADAM: case GGML_OPT_ADAM:
{ {
// FIXME
// result = (struct ggml_opt_params) { // result = (struct ggml_opt_params) {
// .type = GGML_OPT_ADAM, result.type = GGML_OPT_ADAM;
// .graph_size = GGML_DEFAULT_GRAPH_SIZE, result.graph_size = GGML_DEFAULT_GRAPH_SIZE;
// .n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ? result.n_threads = 1; // FIXME: GGML_DEFAULT_N_THREADS ?
// .past = 0, result.past = 0;
// .delta = 1e-5f, result.delta = 1e-5f;
// .max_no_improvement = 100, result.max_no_improvement = 100;
// .print_forward_graph = true, result.print_forward_graph = true;
// .print_backward_graph = true, result.print_backward_graph = true;
// .n_gradient_accumulation = 1, result.n_gradient_accumulation = 1;
// .adam = { // result.adam = {
// .n_iter = 10000, result.adam.n_iter = 10000;
// .sched = 1.000f, result.adam.sched = 1.000f;
// .decay = 0.0f, result.adam.decay = 0.0f;
// .decay_min_ndim = 2, result.adam.decay_min_ndim = 2;
// .alpha = 0.001f, result.adam.alpha = 0.001f;
// .beta1 = 0.9f, result.adam.beta1 = 0.9f;
// .beta2 = 0.999f, result.adam.beta2 = 0.999f;
// .eps = 1e-8f, result.adam.eps = 1e-8f;
// .eps_f = 1e-5f, result.adam.eps_f = 1e-5f;
// .eps_g = 1e-3f, result.adam.eps_g = 1e-3f;
// .gclip = 0.0f, result.adam.gclip = 0.0f;
// }, // },
// }; // };
} break; } break;
case GGML_OPT_LBFGS: case GGML_OPT_LBFGS:
break; break;
//{ //{
// TODO FIXME // TODO FIXME
// result = (struct ggml_opt_params) { // result = (struct ggml_opt_params) {
// .type = GGML_OPT_LBFGS, result.type = GGML_OPT_LBFGS;
// .graph_size = GGML_DEFAULT_GRAPH_SIZE, result.graph_size = GGML_DEFAULT_GRAPH_SIZE;
// .n_threads = 1, result.n_threads = 1;
// .past = 0, result.past = 0;
// .delta = 1e-5f, result.delta = 1e-5f ;
result.max_no_improvement = 0;
result.print_forward_graph = true;
result.print_backward_graph = true;
result.n_gradient_accumulation = 1;
// .max_no_improvement = 0, result.lbfgs.m = 6;
result.lbfgs.n_iter = 100;
// .print_forward_graph = true, result.lbfgs.max_linesearch = 20;
// .print_backward_graph = true, result.lbfgs.eps = 1e-5f;
result.lbfgs.ftol = 1e-4f;
// .n_gradient_accumulation = 1, result.lbfgs.wolfe = 0.9f;
result.lbfgs.min_step = 1e-20f;
// .lbfgs = { result.lbfgs.max_step = 1e+20f;
// .m = 6, result.lbfgs.linesearch = GGML_LINESEARCH_DEFAULT;
// .n_iter = 100,
// .max_linesearch = 20,
// .eps = 1e-5f,
// .ftol = 1e-4f,
// .wolfe = 0.9f,
// .min_step = 1e-20f,
// .max_step = 1e+20f,
// .linesearch = GGML_LINESEARCH_DEFAULT,
// } // }
//}; //};
@ -18648,11 +18632,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// FIXME // FIXME
struct ggml_init_params pdata; struct ggml_init_params pdata;
// = { pdata.mem_size = mem_size,
// .mem_size = mem_size, pdata.mem_buffer = NULL,
// .mem_buffer = NULL, pdata.no_alloc = params.no_alloc,
// .no_alloc = params.no_alloc,
// };
*params.ctx = ggml_init(pdata); *params.ctx = ggml_init(pdata);
@ -18684,7 +18666,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// create the tensors // create the tensors
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) { for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
const int64_t ne[GGML_MAX_DIMS] = { const int64_t ne[GGML_MAX_DIMS] = {
(int64_t)ctx->infos[i].ne[0],// FIXME narrowing (int64_t)ctx->infos[i].ne[0],
(int64_t)ctx->infos[i].ne[1], (int64_t)ctx->infos[i].ne[1],
(int64_t)ctx->infos[i].ne[2], (int64_t)ctx->infos[i].ne[2],
(int64_t)ctx->infos[i].ne[3], (int64_t)ctx->infos[i].ne[3],

View file

@ -46,6 +46,6 @@ llama_build_and_test_executable(test-grad0.cpp) # SLOW
llama_build_and_test_executable(test-rope.cpp) llama_build_and_test_executable(test-rope.cpp)
# dummy executable - not installed # dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE) get_filename_component(TEST_TARGET test-c.cpp NAME_WE)
add_executable(${TEST_TARGET} test-c.c) add_executable(${TEST_TARGET} test-c.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE llama) target_link_libraries(${TEST_TARGET} PRIVATE llama)

3
tests/test-c.cpp Normal file
View file

@ -0,0 +1,3 @@
#include "llama.h"
int main(void) {}