diff --git a/Makefile b/Makefile index a10fada09..98bf8845c 100644 --- a/Makefile +++ b/Makefile @@ -174,9 +174,13 @@ MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar +# TODO(cebtenzzre): remove this once PR #2632 gets merged +TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations + ifneq '' '$(findstring clang,$(shell $(CXX) --version))' # clang++ only - MK_CXXFLAGS += -Wmissing-prototypes + MK_CXXFLAGS += -Wmissing-prototypes + TTFS_CXXFLAGS += -Wno-missing-prototypes else # g++ only MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds @@ -527,7 +531,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o $(OBJS) - $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + $(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 1f1cd6178..947aa7ed3 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -30,37 +30,35 @@ struct random_uniform_distribution { std::uniform_real_distribution rd; }; -static void init_random_normal_distribution( - struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max -) { +void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) { rnd->gen = std::mt19937(seed); rnd->rd = std::normal_distribution{mean, std}; rnd->min = min; rnd->max = max; } -static void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) { +void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) { rnd->gen = std::mt19937(seed); rnd->rd = std::uniform_real_distribution{min, max}; } -static int clamp(const int v, const int min, const int max) { +int clamp(const int v, const int min, const int max) { return ((v < min) ? (min) : (v > max) ? (max) : v); } -static float fclamp(const float v, const float min, const float max) { +float fclamp(const float v, const float min, const float max) { return ((v < min) ? (min) : (v > max) ? (max) : v); } -static float frand() { +float frand() { return (float)rand()/(float)RAND_MAX; } -static float frand_normal(struct random_normal_distribution * rnd) { +float frand_normal(struct random_normal_distribution * rnd) { return fclamp(rnd->rd(rnd->gen), rnd->min, rnd->max); } -static float frand_uniform(struct random_uniform_distribution * rnd) { +float frand_uniform(struct random_uniform_distribution * rnd) { return rnd->rd(rnd->gen); } @@ -210,85 +208,85 @@ struct my_llama_model { }; // gguf constants -static const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type"; -static const char * LLM_KV_OPTIMIZER_TYPE_ADAM = "adam"; -static const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs"; -static const char * LLM_KV_OPTIMIZER_FILE_VERSION = "optimizer.file_version"; -static const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT = "optimizer.convergence_past_count"; -static const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT = "optimizer.parameter_count"; -static const char * LLM_KV_OPTIMIZER_ITERATION_COUNT = "optimizer.iteration_count"; -static const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED = "optimizer.just_initialized"; -static const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS = "optimizer.adam.best_loss"; -static const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS = "optimizer.adam.previous_loss"; -static const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT = "optimizer.adam.no_improvement_count"; -static const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count"; -static const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS = "optimizer.lbfgs.best_loss"; -static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP = "optimizer.lbfgs.line_search_step"; -static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J = "optimizer.lbfgs.line_search_j"; -static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K = "optimizer.lbfgs.line_search_k"; -static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END = "optimizer.lbfgs.line_search_end"; -static const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count"; +const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type"; +const char * LLM_KV_OPTIMIZER_TYPE_ADAM = "adam"; +const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs"; +const char * LLM_KV_OPTIMIZER_FILE_VERSION = "optimizer.file_version"; +const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT = "optimizer.convergence_past_count"; +const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT = "optimizer.parameter_count"; +const char * LLM_KV_OPTIMIZER_ITERATION_COUNT = "optimizer.iteration_count"; +const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED = "optimizer.just_initialized"; +const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS = "optimizer.adam.best_loss"; +const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS = "optimizer.adam.previous_loss"; +const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT = "optimizer.adam.no_improvement_count"; +const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count"; +const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS = "optimizer.lbfgs.best_loss"; +const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP = "optimizer.lbfgs.line_search_step"; +const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J = "optimizer.lbfgs.line_search_j"; +const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K = "optimizer.lbfgs.line_search_k"; +const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END = "optimizer.lbfgs.line_search_end"; +const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count"; -static const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS = "optimizer.adam.first_moments"; -static const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS = "optimizer.adam.second_moments"; -static const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values"; +const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS = "optimizer.adam.first_moments"; +const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS = "optimizer.adam.second_moments"; +const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS = "optimizer.lbfgs.current_parameters"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS = "optimizer.lbfgs.current_gradients"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS = "optimizer.lbfgs.previous_gradients"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION = "optimizer.lbfgs.search_direction"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES = "optimizer.lbfgs.past_loss_values"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA = "optimizer.lbfgs.memory_alpha"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS = "optimizer.lbfgs.memory_ys"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S = "optimizer.lbfgs.memory_s"; -static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y = "optimizer.lbfgs.memory_y"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS = "optimizer.lbfgs.current_parameters"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS = "optimizer.lbfgs.current_gradients"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS = "optimizer.lbfgs.previous_gradients"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION = "optimizer.lbfgs.search_direction"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES = "optimizer.lbfgs.past_loss_values"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA = "optimizer.lbfgs.memory_alpha"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS = "optimizer.lbfgs.memory_ys"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S = "optimizer.lbfgs.memory_s"; +const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y = "optimizer.lbfgs.memory_y"; -static const char * LLM_KV_TRAINING_FILE_VERSION = "training.file_version"; -static const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count"; -static const char * LLM_KV_TRAINING_SAMPLE_COUNT = "training.sample_count"; -static const char * LLM_KV_TRAINING_TOKEN_COUNT = "training.token_count"; +const char * LLM_KV_TRAINING_FILE_VERSION = "training.file_version"; +const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count"; +const char * LLM_KV_TRAINING_SAMPLE_COUNT = "training.sample_count"; +const char * LLM_KV_TRAINING_TOKEN_COUNT = "training.token_count"; // gguf constants (sync with gguf.py) -static const char * LLM_KV_GENERAL_ARCHITECTURE = "general.architecture"; -static const char * LLM_KV_GENERAL_FILE_TYPE = "general.file_type"; +const char * LLM_KV_GENERAL_ARCHITECTURE = "general.architecture"; +const char * LLM_KV_GENERAL_FILE_TYPE = "general.file_type"; -static const char * LLM_KV_CONTEXT_LENGTH = "%s.context_length"; -static const char * LLM_KV_EMBEDDING_LENGTH = "%s.embedding_length"; -static const char * LLM_KV_BLOCK_COUNT = "%s.block_count"; -static const char * LLM_KV_FEED_FORWARD_LENGTH = "%s.feed_forward_length"; -static const char * LLM_KV_ATTENTION_HEAD_COUNT = "%s.attention.head_count"; -static const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon"; -static const char * LLM_KV_ROPE_DIMENSION_COUNT = "%s.rope.dimension_count"; -static const char * LLM_KV_ROPE_FREQ_BASE = "%s.rope.freq_base"; // TODO load in llama.cpp -static const char * LLM_KV_ROPE_SCALE_LINEAR = "%s.rope.scale_linear"; +const char * LLM_KV_CONTEXT_LENGTH = "%s.context_length"; +const char * LLM_KV_EMBEDDING_LENGTH = "%s.embedding_length"; +const char * LLM_KV_BLOCK_COUNT = "%s.block_count"; +const char * LLM_KV_FEED_FORWARD_LENGTH = "%s.feed_forward_length"; +const char * LLM_KV_ATTENTION_HEAD_COUNT = "%s.attention.head_count"; +const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon"; +const char * LLM_KV_ROPE_DIMENSION_COUNT = "%s.rope.dimension_count"; +const char * LLM_KV_ROPE_FREQ_BASE = "%s.rope.freq_base"; // TODO load in llama.cpp +const char * LLM_KV_ROPE_SCALE_LINEAR = "%s.rope.scale_linear"; -static const char * LLM_KV_TOKENIZER_MODEL = "tokenizer.ggml.model"; -static const char * LLM_KV_TOKENIZER_LIST = "tokenizer.ggml.tokens"; -static const char * LLM_KV_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"; -static const char * LLM_KV_TOKENIZER_SCORES = "tokenizer.ggml.scores"; -static const char * LLM_KV_TOKENIZER_MERGES = "tokenizer.ggml.merges"; -static const char * LLM_KV_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"; -static const char * LLM_KV_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"; -static const char * LLM_KV_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"; -static const char * LLM_KV_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"; -static const char * LLM_KV_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"; +const char * LLM_KV_TOKENIZER_MODEL = "tokenizer.ggml.model"; +const char * LLM_KV_TOKENIZER_LIST = "tokenizer.ggml.tokens"; +const char * LLM_KV_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"; +const char * LLM_KV_TOKENIZER_SCORES = "tokenizer.ggml.scores"; +const char * LLM_KV_TOKENIZER_MERGES = "tokenizer.ggml.merges"; +const char * LLM_KV_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"; +const char * LLM_KV_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"; +const char * LLM_KV_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"; +const char * LLM_KV_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"; +const char * LLM_KV_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"; -static const char * LLM_TENSOR_TOKEN_EMBD = "token_embd"; -static const char * LLM_TENSOR_OUTPUT_NORM = "output_norm"; -static const char * LLM_TENSOR_OUTPUT = "output"; -static const char * LLM_TENSOR_ATTN_NORM = "blk.%d.attn_norm"; -static const char * LLM_TENSOR_ATTN_Q = "blk.%d.attn_q"; -static const char * LLM_TENSOR_ATTN_K = "blk.%d.attn_k"; -static const char * LLM_TENSOR_ATTN_V = "blk.%d.attn_v"; -static const char * LLM_TENSOR_ATTN_OUT = "blk.%d.attn_output"; -static const char * LLM_TENSOR_FFN_NORM = "blk.%d.ffn_norm"; -static const char * LLM_TENSOR_FFN_GATE = "blk.%d.ffn_gate"; -static const char * LLM_TENSOR_FFN_DOWN = "blk.%d.ffn_down"; -static const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up"; +const char * LLM_TENSOR_TOKEN_EMBD = "token_embd"; +const char * LLM_TENSOR_OUTPUT_NORM = "output_norm"; +const char * LLM_TENSOR_OUTPUT = "output"; +const char * LLM_TENSOR_ATTN_NORM = "blk.%d.attn_norm"; +const char * LLM_TENSOR_ATTN_Q = "blk.%d.attn_q"; +const char * LLM_TENSOR_ATTN_K = "blk.%d.attn_k"; +const char * LLM_TENSOR_ATTN_V = "blk.%d.attn_v"; +const char * LLM_TENSOR_ATTN_OUT = "blk.%d.attn_output"; +const char * LLM_TENSOR_FFN_NORM = "blk.%d.ffn_norm"; +const char * LLM_TENSOR_FFN_GATE = "blk.%d.ffn_gate"; +const char * LLM_TENSOR_FFN_DOWN = "blk.%d.ffn_down"; +const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up"; -static void print_params(struct my_llama_hparams * params) { +void print_params(struct my_llama_hparams * params) { printf("%s: n_vocab: %d\n", __func__, params->n_vocab); printf("%s: n_ctx: %d\n", __func__, params->n_ctx); printf("%s: n_embd: %d\n", __func__, params->n_embd); @@ -298,7 +296,7 @@ static void print_params(struct my_llama_hparams * params) { printf("%s: n_rot: %d\n", __func__, params->n_rot); } -static void init_model(struct my_llama_model * model) { +void init_model(struct my_llama_model * model) { const auto & hparams = model->hparams; const uint32_t n_embd = hparams.n_embd; @@ -365,7 +363,7 @@ static void init_model(struct my_llama_model * model) { } } -static void set_param_model(struct my_llama_model * model) { +void set_param_model(struct my_llama_model * model) { const auto& hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -391,7 +389,7 @@ static void set_param_model(struct my_llama_model * model) { } } -static void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) { +void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) { const auto & hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -420,25 +418,25 @@ static void randomize_model(struct my_llama_model * model, int seed, float mean, } } -static void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { +void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { GGML_ASSERT(tensor->n_dims == 1); GGML_ASSERT(tensor->ne[0] == ne0); } -static void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { +void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { GGML_ASSERT(tensor->n_dims == 2); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); } -static void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { +void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { GGML_ASSERT(tensor->n_dims == 3); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); GGML_ASSERT(tensor->ne[2] == ne2); } -static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { +void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { GGML_ASSERT(tensor->n_dims == 4); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); @@ -446,11 +444,11 @@ static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne GGML_ASSERT(tensor->ne[3] == ne3); } -size_t hash(void * p) { +static size_t hash(void * p) { return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE; } -size_t hash_find(void * hash_table[], void * p) { +static size_t hash_find(void * hash_table[], void * p) { size_t h = hash(p); // linear probing @@ -490,6 +488,7 @@ struct hash_map { void * keys[GGML_GRAPH_HASHTABLE_SIZE]; void * vals[GGML_GRAPH_HASHTABLE_SIZE]; }; +//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map); struct hash_map * new_hash_map() { struct hash_map * result = new struct hash_map; @@ -500,7 +499,7 @@ struct hash_map * new_hash_map() { return result; }; -static void free_hash_map(struct hash_map * map) { +void free_hash_map(struct hash_map * map) { delete map; } @@ -509,7 +508,7 @@ static bool ggml_is_view(struct ggml_tensor * t) { t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY; } -struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { +static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { switch (t->op) { case GGML_OP_PERMUTE: case GGML_OP_RESHAPE: @@ -523,7 +522,7 @@ struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { } } -struct ggml_tensor * get_view_source(struct ggml_tensor * t) { +static struct ggml_tensor * get_view_source(struct ggml_tensor * t) { struct ggml_tensor * parent = t; do { parent = get_view_parent(parent); @@ -597,14 +596,13 @@ struct ggml_tensor * ggml_recompute_graph_node( return clone; }; -static void ggml_build_backward_gradient_checkpointing( - struct ggml_context * ctx, - struct ggml_cgraph * gf, - struct ggml_cgraph * gb, - struct ggml_cgraph * gb_tmp, - struct ggml_tensor ** checkpoints, - int n_checkpoints -) { +void ggml_build_backward_gradient_checkpointing( + struct ggml_context * ctx, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb, + struct ggml_cgraph * gb_tmp, + struct ggml_tensor * * checkpoints, + int n_checkpoints) { *gb_tmp = *gf; ggml_build_backward_expand(ctx, gf, gb_tmp, true); @@ -828,22 +826,22 @@ struct ggml_tensor * llama_build_train_graphs( return t36; } -static void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) { +void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]); *ptr = value; } -static void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { +void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); *ptr = value; } -static void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) { +void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) { int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); *ptr = value; } -static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { +float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); return *ptr; } @@ -853,7 +851,7 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { return *ptr; } -static void print_row(struct ggml_tensor * probs, int i) { +void print_row(struct ggml_tensor * probs, int i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = get_f32_2d(probs, k, i); printf(" %.2f", p); @@ -861,7 +859,7 @@ static void print_row(struct ggml_tensor * probs, int i) { printf("\n"); } -static void print_matrix(struct ggml_tensor * probs) { +void print_matrix(struct ggml_tensor * probs) { assert(probs->n_dims == 2); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { @@ -872,11 +870,7 @@ static void print_matrix(struct ggml_tensor * probs) { } } -static void get_example_targets( - struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, - size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, - struct ggml_tensor * target_probs -) { +void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { int n_tokens = tokens_input->ne[0]; int n_vocab = target_logits->ne[0]; @@ -896,11 +890,7 @@ static void get_example_targets( } } -static void get_example_targets_batch( - struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, - size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, - struct ggml_tensor * target_probs -) { +void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT(target_logits->n_dims == 3); GGML_ASSERT(target_probs->n_dims == 3); @@ -935,7 +925,7 @@ static void get_example_targets_batch( } } -static int tokenize_file(struct llama_context * lctx, const char * filename, std::vector & out) { +int tokenize_file(struct llama_context * lctx, const char * filename, std::vector& out) { FILE * fp = std::fopen(filename, "rb"); if (fp == NULL) { return 0; @@ -1006,7 +996,7 @@ static int tokenize_file(struct llama_context * lctx, const char * filename, std return n_tokens; } -static void shuffle_ints(int * begin, int * end) { +void shuffle_ints(int * begin, int * end) { if (end <= begin) return; int max=begin[0]; for (int i=1; itype == b->type); @@ -1050,7 +1040,7 @@ static bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) { return true; } -static void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { +void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { if (dst == NULL) { return; } @@ -1063,9 +1053,7 @@ static void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * } } -static void load_opt_context_gguf( - struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt -) { +void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt) { // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read uint32_t file_version; @@ -1126,7 +1114,7 @@ static void load_opt_context_gguf( } } -static void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) { +void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) { gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_FILE_VERSION, 0); gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT, opt->params.past); gguf_set_val_u64(fctx, LLM_KV_OPTIMIZER_PARAMETER_COUNT, (uint64_t) opt->nx); @@ -1193,9 +1181,7 @@ static void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_co } } -static void load_llama_model_gguf( - struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model -) { +void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) { // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read std::string arch; @@ -1266,9 +1252,7 @@ static void load_llama_model_gguf( } } -static void save_llama_model_gguf( - struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model -) { +void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model) { const char * arch = "llama"; enum llama_ftype ftype = LLAMA_FTYPE_ALL_F32; @@ -1411,7 +1395,7 @@ static void save_llama_model_gguf( } } -static void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) { +void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) { struct gguf_context * fctx = gguf_init_empty(); save_llama_model_gguf(fctx, fn_vocab_model, model); @@ -1422,10 +1406,7 @@ static void save_llama_model_file(const char * filename, const char * fn_vocab_m gguf_free(fctx); } -static void load_checkpoint_gguf( - struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, - struct ggml_opt_context * opt -) { +void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, struct ggml_opt_context * opt) { load_llama_model_gguf(fctx, f_ggml_ctx, model); uint32_t file_version; @@ -1439,10 +1420,7 @@ static void load_checkpoint_gguf( load_opt_context_gguf(fctx, f_ggml_ctx, opt); } -static void save_checkpoint_gguf( - struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, - struct ggml_opt_context * opt -) { +void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) { save_llama_model_gguf(fctx, fn_vocab_model, model); gguf_set_val_u32(fctx, LLM_KV_TRAINING_FILE_VERSION, 0); @@ -1453,7 +1431,7 @@ static void save_checkpoint_gguf( save_opt_context_gguf(fctx, opt); } -static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) { +bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) { struct ggml_context * f_ggml_ctx; struct gguf_init_params params; params.no_alloc = false; @@ -1468,9 +1446,7 @@ static bool load_checkpoint_file(const char * filename, struct my_llama_model * return true; } -static void save_checkpoint_file( - const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt -) { +void save_checkpoint_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) { struct gguf_context * fctx = gguf_init_empty(); save_checkpoint_gguf(fctx, fn_vocab_model, model, opt); @@ -1481,7 +1457,7 @@ static void save_checkpoint_file( gguf_free(fctx); } -static float cosine_decay(const int decay_steps, const float minimum, int step) { +float cosine_decay(const int decay_steps, const float minimum, int step) { if (step > decay_steps) { step = decay_steps; } @@ -1490,9 +1466,7 @@ static float cosine_decay(const int decay_steps, const float minimum, int step) return decay; } -static float cosine_decay_restart( - int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart -) { +float cosine_decay_restart(int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart) { if (enable_restart) { while (step > decay_steps) { step -= decay_steps; @@ -1620,7 +1594,7 @@ struct train_params get_default_train_params() { return params; } -static void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) { +void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); @@ -1677,7 +1651,7 @@ static void train_print_usage(int /*argc*/, char ** argv, const struct train_par fprintf(stderr, "\n"); } -static bool train_params_parse(int argc, char ** argv, struct train_params * params) { +bool train_params_parse(int argc, char ** argv, struct train_params * params) { bool invalid_param = false; std::string arg; struct train_params default_params = get_default_train_params(); @@ -1971,7 +1945,7 @@ struct opt_callback_data { struct ggml_tensor * target_probs; }; -static void opt_callback(void * vdata, float * sched) { +void opt_callback(void * vdata, float * sched) { struct opt_callback_data * data = (struct opt_callback_data *) vdata; struct train_params * params = data->params; struct ggml_opt_context * opt = data->opt;