diff --git a/Makefile b/Makefile
index a10fada09..98bf8845c 100644
--- a/Makefile
+++ b/Makefile
@@ -174,9 +174,13 @@ MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow
 				-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
 MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
 
+# TODO(cebtenzzre): remove this once PR #2632 gets merged
+TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
+
 ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
 	# clang++ only
-	MK_CXXFLAGS += -Wmissing-prototypes
+	MK_CXXFLAGS   += -Wmissing-prototypes
+	TTFS_CXXFLAGS += -Wno-missing-prototypes
 else
 	# g++ only
 	MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
@@ -527,7 +531,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o $(OBJS)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+	$(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 1f1cd6178..947aa7ed3 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -30,37 +30,35 @@ struct random_uniform_distribution {
     std::uniform_real_distribution<float> rd;
 };
 
-static void init_random_normal_distribution(
-    struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max
-) {
+void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) {
     rnd->gen = std::mt19937(seed);
     rnd->rd = std::normal_distribution<float>{mean, std};
     rnd->min = min;
     rnd->max = max;
 }
 
-static void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) {
+void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) {
     rnd->gen = std::mt19937(seed);
     rnd->rd = std::uniform_real_distribution<float>{min, max};
 }
 
-static int clamp(const int v, const int min, const int max) {
+int clamp(const int v, const int min, const int max) {
     return ((v < min) ? (min) : (v > max) ? (max) : v);
 }
 
-static float fclamp(const float v, const float min, const float max) {
+float fclamp(const float v, const float min, const float max) {
     return ((v < min) ? (min) : (v > max) ? (max) : v);
 }
 
-static float frand() {
+float frand() {
     return (float)rand()/(float)RAND_MAX;
 }
 
-static float frand_normal(struct random_normal_distribution * rnd) {
+float frand_normal(struct random_normal_distribution * rnd) {
     return fclamp(rnd->rd(rnd->gen), rnd->min, rnd->max);
 }
 
-static float frand_uniform(struct random_uniform_distribution * rnd) {
+float frand_uniform(struct random_uniform_distribution * rnd) {
     return rnd->rd(rnd->gen);
 }
 
@@ -210,85 +208,85 @@ struct my_llama_model {
 };
 
 // gguf constants
-static const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type";
-static const char * LLM_KV_OPTIMIZER_TYPE_ADAM  = "adam";
-static const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs";
-static const char * LLM_KV_OPTIMIZER_FILE_VERSION               = "optimizer.file_version";
-static const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT     = "optimizer.convergence_past_count";
-static const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT            = "optimizer.parameter_count";
-static const char * LLM_KV_OPTIMIZER_ITERATION_COUNT            = "optimizer.iteration_count";
-static const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED           = "optimizer.just_initialized";
-static const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS             = "optimizer.adam.best_loss";
-static const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS         = "optimizer.adam.previous_loss";
-static const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT  = "optimizer.adam.no_improvement_count";
-static const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count";
-static const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS            = "optimizer.lbfgs.best_loss";
-static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP     = "optimizer.lbfgs.line_search_step";
-static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J        = "optimizer.lbfgs.line_search_j";
-static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K        = "optimizer.lbfgs.line_search_k";
-static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END      = "optimizer.lbfgs.line_search_end";
-static const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count";
+const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type";
+const char * LLM_KV_OPTIMIZER_TYPE_ADAM  = "adam";
+const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs";
+const char * LLM_KV_OPTIMIZER_FILE_VERSION               = "optimizer.file_version";
+const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT     = "optimizer.convergence_past_count";
+const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT            = "optimizer.parameter_count";
+const char * LLM_KV_OPTIMIZER_ITERATION_COUNT            = "optimizer.iteration_count";
+const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED           = "optimizer.just_initialized";
+const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS             = "optimizer.adam.best_loss";
+const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS         = "optimizer.adam.previous_loss";
+const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT  = "optimizer.adam.no_improvement_count";
+const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count";
+const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS            = "optimizer.lbfgs.best_loss";
+const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP     = "optimizer.lbfgs.line_search_step";
+const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J        = "optimizer.lbfgs.line_search_j";
+const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K        = "optimizer.lbfgs.line_search_k";
+const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END      = "optimizer.lbfgs.line_search_end";
+const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count";
 
-static const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS    = "optimizer.adam.first_moments";
-static const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS   = "optimizer.adam.second_moments";
-static const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values";
+const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS    = "optimizer.adam.first_moments";
+const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS   = "optimizer.adam.second_moments";
+const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values";
 
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS  = "optimizer.lbfgs.current_parameters";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS   = "optimizer.lbfgs.current_gradients";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS  = "optimizer.lbfgs.previous_gradients";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION    = "optimizer.lbfgs.search_direction";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES    = "optimizer.lbfgs.past_loss_values";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA        = "optimizer.lbfgs.memory_alpha";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS           = "optimizer.lbfgs.memory_ys";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S            = "optimizer.lbfgs.memory_s";
-static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y            = "optimizer.lbfgs.memory_y";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS  = "optimizer.lbfgs.current_parameters";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS   = "optimizer.lbfgs.current_gradients";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS  = "optimizer.lbfgs.previous_gradients";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION    = "optimizer.lbfgs.search_direction";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES    = "optimizer.lbfgs.past_loss_values";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA        = "optimizer.lbfgs.memory_alpha";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS           = "optimizer.lbfgs.memory_ys";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S            = "optimizer.lbfgs.memory_s";
+const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y            = "optimizer.lbfgs.memory_y";
 
-static const char * LLM_KV_TRAINING_FILE_VERSION    = "training.file_version";
-static const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count";
-static const char * LLM_KV_TRAINING_SAMPLE_COUNT    = "training.sample_count";
-static const char * LLM_KV_TRAINING_TOKEN_COUNT     = "training.token_count";
+const char * LLM_KV_TRAINING_FILE_VERSION    = "training.file_version";
+const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count";
+const char * LLM_KV_TRAINING_SAMPLE_COUNT    = "training.sample_count";
+const char * LLM_KV_TRAINING_TOKEN_COUNT     = "training.token_count";
 
 // gguf constants (sync with gguf.py)
 
-static const char * LLM_KV_GENERAL_ARCHITECTURE        = "general.architecture";
-static const char * LLM_KV_GENERAL_FILE_TYPE           = "general.file_type";
+const char * LLM_KV_GENERAL_ARCHITECTURE        = "general.architecture";
+const char * LLM_KV_GENERAL_FILE_TYPE           = "general.file_type";
 
-static const char * LLM_KV_CONTEXT_LENGTH              = "%s.context_length";
-static const char * LLM_KV_EMBEDDING_LENGTH            = "%s.embedding_length";
-static const char * LLM_KV_BLOCK_COUNT                 = "%s.block_count";
-static const char * LLM_KV_FEED_FORWARD_LENGTH         = "%s.feed_forward_length";
-static const char * LLM_KV_ATTENTION_HEAD_COUNT        = "%s.attention.head_count";
-static const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon";
-static const char * LLM_KV_ROPE_DIMENSION_COUNT        = "%s.rope.dimension_count";
-static const char * LLM_KV_ROPE_FREQ_BASE              = "%s.rope.freq_base"; // TODO load in llama.cpp
-static const char * LLM_KV_ROPE_SCALE_LINEAR           = "%s.rope.scale_linear";
+const char * LLM_KV_CONTEXT_LENGTH              = "%s.context_length";
+const char * LLM_KV_EMBEDDING_LENGTH            = "%s.embedding_length";
+const char * LLM_KV_BLOCK_COUNT                 = "%s.block_count";
+const char * LLM_KV_FEED_FORWARD_LENGTH         = "%s.feed_forward_length";
+const char * LLM_KV_ATTENTION_HEAD_COUNT        = "%s.attention.head_count";
+const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon";
+const char * LLM_KV_ROPE_DIMENSION_COUNT        = "%s.rope.dimension_count";
+const char * LLM_KV_ROPE_FREQ_BASE              = "%s.rope.freq_base"; // TODO load in llama.cpp
+const char * LLM_KV_ROPE_SCALE_LINEAR           = "%s.rope.scale_linear";
 
-static const char * LLM_KV_TOKENIZER_MODEL             = "tokenizer.ggml.model";
-static const char * LLM_KV_TOKENIZER_LIST              = "tokenizer.ggml.tokens";
-static const char * LLM_KV_TOKENIZER_TOKEN_TYPE        = "tokenizer.ggml.token_type";
-static const char * LLM_KV_TOKENIZER_SCORES            = "tokenizer.ggml.scores";
-static const char * LLM_KV_TOKENIZER_MERGES            = "tokenizer.ggml.merges";
-static const char * LLM_KV_TOKENIZER_BOS_ID            = "tokenizer.ggml.bos_token_id";
-static const char * LLM_KV_TOKENIZER_EOS_ID            = "tokenizer.ggml.eos_token_id";
-static const char * LLM_KV_TOKENIZER_UNK_ID            = "tokenizer.ggml.unknown_token_id";
-static const char * LLM_KV_TOKENIZER_SEP_ID            = "tokenizer.ggml.seperator_token_id";
-static const char * LLM_KV_TOKENIZER_PAD_ID            = "tokenizer.ggml.padding_token_id";
+const char * LLM_KV_TOKENIZER_MODEL             = "tokenizer.ggml.model";
+const char * LLM_KV_TOKENIZER_LIST              = "tokenizer.ggml.tokens";
+const char * LLM_KV_TOKENIZER_TOKEN_TYPE        = "tokenizer.ggml.token_type";
+const char * LLM_KV_TOKENIZER_SCORES            = "tokenizer.ggml.scores";
+const char * LLM_KV_TOKENIZER_MERGES            = "tokenizer.ggml.merges";
+const char * LLM_KV_TOKENIZER_BOS_ID            = "tokenizer.ggml.bos_token_id";
+const char * LLM_KV_TOKENIZER_EOS_ID            = "tokenizer.ggml.eos_token_id";
+const char * LLM_KV_TOKENIZER_UNK_ID            = "tokenizer.ggml.unknown_token_id";
+const char * LLM_KV_TOKENIZER_SEP_ID            = "tokenizer.ggml.seperator_token_id";
+const char * LLM_KV_TOKENIZER_PAD_ID            = "tokenizer.ggml.padding_token_id";
 
-static const char * LLM_TENSOR_TOKEN_EMBD    = "token_embd";
-static const char * LLM_TENSOR_OUTPUT_NORM   = "output_norm";
-static const char * LLM_TENSOR_OUTPUT        = "output";
-static const char * LLM_TENSOR_ATTN_NORM     = "blk.%d.attn_norm";
-static const char * LLM_TENSOR_ATTN_Q        = "blk.%d.attn_q";
-static const char * LLM_TENSOR_ATTN_K        = "blk.%d.attn_k";
-static const char * LLM_TENSOR_ATTN_V        = "blk.%d.attn_v";
-static const char * LLM_TENSOR_ATTN_OUT      = "blk.%d.attn_output";
-static const char * LLM_TENSOR_FFN_NORM      = "blk.%d.ffn_norm";
-static const char * LLM_TENSOR_FFN_GATE      = "blk.%d.ffn_gate";
-static const char * LLM_TENSOR_FFN_DOWN      = "blk.%d.ffn_down";
-static const char * LLM_TENSOR_FFN_UP        = "blk.%d.ffn_up";
+const char * LLM_TENSOR_TOKEN_EMBD    = "token_embd";
+const char * LLM_TENSOR_OUTPUT_NORM   = "output_norm";
+const char * LLM_TENSOR_OUTPUT        = "output";
+const char * LLM_TENSOR_ATTN_NORM     = "blk.%d.attn_norm";
+const char * LLM_TENSOR_ATTN_Q        = "blk.%d.attn_q";
+const char * LLM_TENSOR_ATTN_K        = "blk.%d.attn_k";
+const char * LLM_TENSOR_ATTN_V        = "blk.%d.attn_v";
+const char * LLM_TENSOR_ATTN_OUT      = "blk.%d.attn_output";
+const char * LLM_TENSOR_FFN_NORM      = "blk.%d.ffn_norm";
+const char * LLM_TENSOR_FFN_GATE      = "blk.%d.ffn_gate";
+const char * LLM_TENSOR_FFN_DOWN      = "blk.%d.ffn_down";
+const char * LLM_TENSOR_FFN_UP        = "blk.%d.ffn_up";
 
-static void print_params(struct my_llama_hparams * params) {
+void print_params(struct my_llama_hparams * params) {
     printf("%s: n_vocab: %d\n", __func__, params->n_vocab);
     printf("%s: n_ctx:   %d\n", __func__, params->n_ctx);
     printf("%s: n_embd:  %d\n", __func__, params->n_embd);
@@ -298,7 +296,7 @@ static void print_params(struct my_llama_hparams * params) {
     printf("%s: n_rot:   %d\n", __func__, params->n_rot);
 }
 
-static void init_model(struct my_llama_model * model) {
+void init_model(struct my_llama_model * model) {
     const auto & hparams = model->hparams;
 
     const uint32_t n_embd  = hparams.n_embd;
@@ -365,7 +363,7 @@ static void init_model(struct my_llama_model * model) {
     }
 }
 
-static void set_param_model(struct my_llama_model * model) {
+void set_param_model(struct my_llama_model * model) {
     const auto& hparams = model->hparams;
 
     const uint32_t n_layer = hparams.n_layer;
@@ -391,7 +389,7 @@ static void set_param_model(struct my_llama_model * model) {
     }
 }
 
-static void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) {
+void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) {
     const auto & hparams = model->hparams;
 
     const uint32_t n_layer = hparams.n_layer;
@@ -420,25 +418,25 @@ static void randomize_model(struct my_llama_model * model, int seed, float mean,
     }
 }
 
-static void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
+void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
     GGML_ASSERT(tensor->n_dims == 1);
     GGML_ASSERT(tensor->ne[0] == ne0);
 }
 
-static void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
+void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
     GGML_ASSERT(tensor->n_dims == 2);
     GGML_ASSERT(tensor->ne[0] == ne0);
     GGML_ASSERT(tensor->ne[1] == ne1);
 }
 
-static void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
+void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
     GGML_ASSERT(tensor->n_dims == 3);
     GGML_ASSERT(tensor->ne[0] == ne0);
     GGML_ASSERT(tensor->ne[1] == ne1);
     GGML_ASSERT(tensor->ne[2] == ne2);
 }
 
-static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
+void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
     GGML_ASSERT(tensor->n_dims == 4);
     GGML_ASSERT(tensor->ne[0] == ne0);
     GGML_ASSERT(tensor->ne[1] == ne1);
@@ -446,11 +444,11 @@ static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne
     GGML_ASSERT(tensor->ne[3] == ne3);
 }
 
-size_t hash(void * p) {
+static size_t hash(void * p) {
     return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
 }
 
-size_t hash_find(void * hash_table[], void * p) {
+static size_t hash_find(void * hash_table[], void * p) {
     size_t h = hash(p);
 
     // linear probing
@@ -490,6 +488,7 @@ struct hash_map {
     void * keys[GGML_GRAPH_HASHTABLE_SIZE];
     void * vals[GGML_GRAPH_HASHTABLE_SIZE];
 };
+//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
 
 struct hash_map * new_hash_map() {
     struct hash_map * result = new struct hash_map;
@@ -500,7 +499,7 @@ struct hash_map * new_hash_map() {
     return result;
 };
 
-static void free_hash_map(struct hash_map * map) {
+void free_hash_map(struct hash_map * map) {
     delete map;
 }
 
@@ -509,7 +508,7 @@ static bool ggml_is_view(struct ggml_tensor * t) {
            t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
 }
 
-struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
+static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
     switch (t->op) {
         case GGML_OP_PERMUTE:
         case GGML_OP_RESHAPE:
@@ -523,7 +522,7 @@ struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
     }
 }
 
-struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
+static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
     struct ggml_tensor * parent = t;
     do {
         parent = get_view_parent(parent);
@@ -597,14 +596,13 @@ struct ggml_tensor * ggml_recompute_graph_node(
     return clone;
 };
 
-static void ggml_build_backward_gradient_checkpointing(
-    struct ggml_context * ctx,
-    struct ggml_cgraph *  gf,
-    struct ggml_cgraph *  gb,
-    struct ggml_cgraph *  gb_tmp,
-    struct ggml_tensor ** checkpoints,
-    int                   n_checkpoints
-) {
+void ggml_build_backward_gradient_checkpointing(
+        struct ggml_context   * ctx,
+        struct ggml_cgraph    * gf,
+        struct ggml_cgraph    * gb,
+        struct ggml_cgraph    * gb_tmp,
+        struct ggml_tensor  * * checkpoints,
+        int                     n_checkpoints) {
     *gb_tmp = *gf;
     ggml_build_backward_expand(ctx, gf, gb_tmp, true);
 
@@ -828,22 +826,22 @@ struct ggml_tensor * llama_build_train_graphs(
     return t36;
 }
 
-static void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) {
+void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) {
     float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]);
     *ptr = value;
 }
 
-static void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) {
+void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) {
     float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
     *ptr = value;
 }
 
-static void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) {
+void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) {
     int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
     *ptr = value;
 }
 
-static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
+float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
     float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
     return *ptr;
 }
@@ -853,7 +851,7 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
     return *ptr;
 }
 
-static void print_row(struct ggml_tensor * probs, int i) {
+void print_row(struct ggml_tensor * probs, int i) {
     for (int k = 0; k < probs->ne[0]; ++k) {
         float p = get_f32_2d(probs, k, i);
         printf(" %.2f", p);
@@ -861,7 +859,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
     printf("\n");
 }
 
-static void print_matrix(struct ggml_tensor * probs) {
+void print_matrix(struct ggml_tensor * probs) {
     assert(probs->n_dims == 2);
     for (int i = 0; i < probs->ne[1]; ++i) {
         for (int k = 0; k < probs->ne[0]; ++k) {
@@ -872,11 +870,7 @@ static void print_matrix(struct ggml_tensor * probs) {
     }
 }
 
-static void get_example_targets(
-    struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data,
-    size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits,
-    struct ggml_tensor * target_probs
-) {
+void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
     int n_tokens = tokens_input->ne[0];
     int n_vocab  = target_logits->ne[0];
 
@@ -896,11 +890,7 @@ static void get_example_targets(
     }
 }
 
-static void get_example_targets_batch(
-    struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data,
-    size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits,
-    struct ggml_tensor * target_probs
-) {
+void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
     GGML_ASSERT(tokens_input->n_dims  == 2);
     GGML_ASSERT(target_logits->n_dims == 3);
     GGML_ASSERT(target_probs->n_dims  == 3);
@@ -935,7 +925,7 @@ static void get_example_targets_batch(
     }
 }
 
-static int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token> & out) {
+int tokenize_file(struct llama_context * lctx, const char * filename, std::vector<llama_token>& out) {
     FILE * fp = std::fopen(filename, "rb");
     if (fp == NULL) {
         return 0;
@@ -1006,7 +996,7 @@ static int tokenize_file(struct llama_context * lctx, const char * filename, std
     return n_tokens;
 }
 
-static void shuffle_ints(int * begin, int * end) {
+void shuffle_ints(int * begin, int * end) {
     if (end <= begin) return;
     int max=begin[0];
     for (int i=1; i<end-begin; ++i) {
@@ -1040,7 +1030,7 @@ static void shuffle_ints(int * begin, int * end) {
 }
 
 
-static bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
+bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
     GGML_ASSERT(a != NULL);
     GGML_ASSERT(b != NULL);
     GGML_ASSERT(a->type == b->type);
@@ -1050,7 +1040,7 @@ static bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
     return true;
 }
 
-static void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
+void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
     if (dst == NULL) {
         return;
     }
@@ -1063,9 +1053,7 @@ static void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context *
     }
 }
 
-static void load_opt_context_gguf(
-    struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt
-) {
+void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt) {
     // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
 
     uint32_t file_version;
@@ -1126,7 +1114,7 @@ static void load_opt_context_gguf(
     }
 }
 
-static void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) {
+void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) {
     gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_FILE_VERSION, 0);
     gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT, opt->params.past);
     gguf_set_val_u64(fctx, LLM_KV_OPTIMIZER_PARAMETER_COUNT, (uint64_t) opt->nx);
@@ -1193,9 +1181,7 @@ static void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_co
     }
 }
 
-static void load_llama_model_gguf(
-    struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model
-) {
+void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
     // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
     std::string arch;
 
@@ -1266,9 +1252,7 @@ static void load_llama_model_gguf(
     }
 }
 
-static void save_llama_model_gguf(
-    struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model
-) {
+void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model) {
     const char * arch = "llama";
     enum llama_ftype ftype = LLAMA_FTYPE_ALL_F32;
 
@@ -1411,7 +1395,7 @@ static void save_llama_model_gguf(
     }
 }
 
-static void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) {
+void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) {
     struct gguf_context * fctx = gguf_init_empty();
 
     save_llama_model_gguf(fctx, fn_vocab_model, model);
@@ -1422,10 +1406,7 @@ static void save_llama_model_file(const char * filename, const char * fn_vocab_m
     gguf_free(fctx);
 }
 
-static void load_checkpoint_gguf(
-    struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model,
-    struct ggml_opt_context * opt
-) {
+void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, struct ggml_opt_context * opt) {
     load_llama_model_gguf(fctx, f_ggml_ctx, model);
 
     uint32_t file_version;
@@ -1439,10 +1420,7 @@ static void load_checkpoint_gguf(
     load_opt_context_gguf(fctx, f_ggml_ctx, opt);
 }
 
-static void save_checkpoint_gguf(
-    struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model,
-    struct ggml_opt_context * opt
-) {
+void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) {
     save_llama_model_gguf(fctx, fn_vocab_model, model);
 
     gguf_set_val_u32(fctx, LLM_KV_TRAINING_FILE_VERSION,    0);
@@ -1453,7 +1431,7 @@ static void save_checkpoint_gguf(
     save_opt_context_gguf(fctx, opt);
 }
 
-static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) {
+bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) {
     struct ggml_context * f_ggml_ctx;
     struct gguf_init_params params;
     params.no_alloc = false;
@@ -1468,9 +1446,7 @@ static bool load_checkpoint_file(const char * filename, struct my_llama_model *
     return true;
 }
 
-static void save_checkpoint_file(
-    const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt
-) {
+void save_checkpoint_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) {
     struct gguf_context * fctx = gguf_init_empty();
 
     save_checkpoint_gguf(fctx, fn_vocab_model, model, opt);
@@ -1481,7 +1457,7 @@ static void save_checkpoint_file(
     gguf_free(fctx);
 }
 
-static float cosine_decay(const int decay_steps, const float minimum, int step) {
+float cosine_decay(const int decay_steps, const float minimum, int step) {
     if (step > decay_steps) {
         step = decay_steps;
     }
@@ -1490,9 +1466,7 @@ static float cosine_decay(const int decay_steps, const float minimum, int step)
     return decay;
 }
 
-static float cosine_decay_restart(
-    int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart
-) {
+float cosine_decay_restart(int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart) {
     if (enable_restart) {
         while (step > decay_steps) {
             step -= decay_steps;
@@ -1620,7 +1594,7 @@ struct train_params get_default_train_params() {
     return params;
 }
 
-static void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
+void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
     fprintf(stderr, "options:\n");
@@ -1677,7 +1651,7 @@ static void train_print_usage(int /*argc*/, char ** argv, const struct train_par
     fprintf(stderr, "\n");
 }
 
-static bool train_params_parse(int argc, char ** argv, struct train_params * params) {
+bool train_params_parse(int argc, char ** argv, struct train_params * params) {
     bool invalid_param = false;
     std::string arg;
     struct train_params default_params = get_default_train_params();
@@ -1971,7 +1945,7 @@ struct opt_callback_data {
     struct ggml_tensor *      target_probs;
 };
 
-static void opt_callback(void * vdata, float * sched) {
+void opt_callback(void * vdata, float * sched) {
     struct opt_callback_data * data = (struct opt_callback_data *) vdata;
     struct train_params * params    = data->params;
     struct ggml_opt_context * opt   = data->opt;