From 45d0c8089a1e5b2caea7c7e958ce67a8222196fe Mon Sep 17 00:00:00 2001 From: Cebtenzzre Date: Fri, 15 Sep 2023 13:47:09 -0400 Subject: [PATCH] do not use anonymous namespaces --- examples/baby-llama/baby-llama.cpp | 148 +++++----- examples/beam-search/beam-search.cpp | 11 +- .../convert-llama2c-to-ggml.cpp | 38 ++- examples/gguf/gguf.cpp | 14 +- examples/main/main.cpp | 21 +- examples/perplexity/perplexity.cpp | 29 +- examples/quantize-stats/quantize-stats.cpp | 48 +--- examples/quantize/quantize.cpp | 10 +- examples/server/server.cpp | 27 +- .../train-text-from-scratch.cpp | 271 ++++++++++-------- pocs/vdot/vdot.cpp | 9 +- tests/test-opt.cpp | 16 +- tests/test-quantize-fns.cpp | 20 +- tests/test-quantize-perf.cpp | 14 +- tests/test-sampling.cpp | 41 +-- 15 files changed, 337 insertions(+), 380 deletions(-) diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 30a06338a..ed61125ea 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -14,9 +14,7 @@ constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; constexpr float rms_norm_eps = 5e-6f; #endif -namespace { - -float frand() { +static float frand() { return (float)rand()/(float)RAND_MAX; } @@ -27,19 +25,21 @@ struct random_normal_distribution { float max; }; -void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) { +static void init_random_normal_distribution( + struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max +) { rnd->gen = std::mt19937(seed); rnd->nd = std::normal_distribution{mean, std}; rnd->min = min; rnd->max = max; } -float frand_normal(struct random_normal_distribution * rnd) { +static float frand_normal(struct random_normal_distribution * rnd) { const float r = rnd->nd(rnd->gen); return ((r < rnd->min) ? (rnd->min) : (r > rnd->max) ? (rnd->max) : r); } -void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * graph, int n_threads) { +static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * graph, int n_threads) { struct ggml_cplan plan = ggml_graph_plan(graph, n_threads); if (plan.work_size > 0) { @@ -50,13 +50,9 @@ void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * graph, ggml_graph_compute(graph, &plan); } -struct ggml_tensor * randomize_tensor( - struct ggml_tensor * tensor, - int ndims, - const int64_t ne[], - float fmin, - float fmax) { - +static struct ggml_tensor * randomize_tensor( + struct ggml_tensor * tensor, int ndims, const int64_t ne[], float fmin, float fmax +) { switch (ndims) { case 1: for (int i0 = 0; i0 < ne[0]; i0++) { @@ -97,11 +93,9 @@ struct ggml_tensor * randomize_tensor( return tensor; } -struct ggml_tensor * randomize_tensor_normal( - struct ggml_tensor * tensor, - int ndims, - const int64_t ne[], - struct random_normal_distribution * rnd) { +static struct ggml_tensor * randomize_tensor_normal( + struct ggml_tensor * tensor, int ndims, const int64_t ne[], struct random_normal_distribution * rnd +) { float scale = 1.0; // xavier switch (ndims) { case 1: @@ -161,7 +155,7 @@ struct llama_hparams { } }; -uint32_t get_n_ff(const struct llama_hparams* hparams) { +static uint32_t get_n_ff(const struct llama_hparams* hparams) { const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult; return n_ff; } @@ -262,7 +256,7 @@ struct llama_model_lora { std::vector layers; }; -void init_model(struct llama_model * model) { +static void init_model(struct llama_model * model) { const auto & hparams = model->hparams; const uint32_t n_embd = hparams.n_embd; @@ -299,7 +293,7 @@ void init_model(struct llama_model * model) { } -void init_model_lora(struct llama_model_lora * model) { +static void init_model_lora(struct llama_model_lora * model) { const auto & hparams = model->hparams; const uint32_t n_embd = hparams.n_embd; @@ -342,7 +336,7 @@ void init_model_lora(struct llama_model_lora * model) { } } -void set_param_model(struct llama_model * model) { +static void set_param_model(struct llama_model * model) { const auto& hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -368,7 +362,7 @@ void set_param_model(struct llama_model * model) { } } -void set_param_model_lora(struct llama_model_lora * model) { +static void set_param_model_lora(struct llama_model_lora * model) { const auto& hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -399,7 +393,7 @@ void set_param_model_lora(struct llama_model_lora * model) { } } -void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) { +static void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) { const auto & hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -428,7 +422,9 @@ void randomize_model(struct llama_model * model, int seed, float mean, float std } -void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, float std, float min, float max) { +static void randomize_model_lora( + struct llama_model_lora * model, int seed, float mean, float std, float min, float max +) { const auto & hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -461,7 +457,7 @@ void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, } } -bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { +static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { const auto & hparams = model->hparams; const uint32_t n_ctx = hparams.n_ctx; @@ -497,7 +493,7 @@ bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int return true; } -bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) { +static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) { const auto & hparams = model->hparams; const uint32_t n_ctx = hparams.n_ctx; @@ -533,15 +529,15 @@ bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * return true; } -struct ggml_tensor * forward( - struct llama_model * model, - struct llama_kv_cache * cache, - struct ggml_context * ctx0, - struct ggml_cgraph * gf, - struct ggml_tensor * tokens_input, - const int n_tokens, - const int n_past) { - +static struct ggml_tensor * forward( + struct llama_model * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past +) { const int N = n_tokens; struct llama_kv_cache& kv_self = *cache; @@ -758,25 +754,25 @@ struct ggml_tensor * forward( return inpL; } -void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { +static void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { GGML_ASSERT(tensor->n_dims == 1); GGML_ASSERT(tensor->ne[0] == ne0); } -void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { +static void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { GGML_ASSERT(tensor->n_dims == 2); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); } -void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { +static void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { GGML_ASSERT(tensor->n_dims == 3); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); GGML_ASSERT(tensor->ne[2] == ne2); } -void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { +static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { GGML_ASSERT(tensor->n_dims == 4); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); @@ -784,16 +780,16 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6 GGML_ASSERT(tensor->ne[3] == ne3); } -struct ggml_tensor * forward_batch( - struct llama_model * model, - struct llama_kv_cache * cache, - struct ggml_context * ctx0, - struct ggml_cgraph * gf, - struct ggml_tensor * tokens_input, - const int n_tokens, - const int n_past, - const int n_batch) { - +static struct ggml_tensor * forward_batch( + struct llama_model * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past, + const int n_batch +) { const int N = n_tokens; struct llama_kv_cache& kv_self = *cache; @@ -1075,16 +1071,15 @@ struct ggml_tensor * forward_batch( return inpL; } - -struct ggml_tensor * forward_lora( - struct llama_model_lora * model, - struct llama_kv_cache * cache, - struct ggml_context * ctx0, - struct ggml_cgraph * gf, - struct ggml_tensor * tokens_input, - const int n_tokens, - const int n_past) { - +static struct ggml_tensor * forward_lora( + struct llama_model_lora * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past +) { const int N = n_tokens; struct llama_kv_cache& kv_self = *cache; @@ -1330,7 +1325,7 @@ struct ggml_tensor * forward_lora( return inpL; } -void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { +static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { assert(logits->n_dims == 2); assert(probs->n_dims == 2); assert(best_samples->n_dims == 1); @@ -1361,7 +1356,10 @@ void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, str } } -void sample_softmax_batch(struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { +static void sample_softmax_batch( + struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs, + struct ggml_tensor * best_samples +) { GGML_ASSERT(best_samples->n_dims == 2); GGML_ASSERT(logits->n_dims == 3); GGML_ASSERT(probs->n_dims == 3); @@ -1395,7 +1393,7 @@ void sample_softmax_batch(struct ggml_context * ctx, struct ggml_tensor * logits } } -void print_row(struct ggml_tensor * probs, int i) { +static void print_row(struct ggml_tensor * probs, int i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); printf(" %.2f", p); @@ -1403,7 +1401,7 @@ void print_row(struct ggml_tensor * probs, int i) { printf("\n"); } -void print_matrix(struct ggml_tensor * probs) { +static void print_matrix(struct ggml_tensor * probs) { assert(probs->n_dims == 2); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { @@ -1414,7 +1412,7 @@ void print_matrix(struct ggml_tensor * probs) { } } -void print_token(int token, int n_vocab) { +static void print_token(int token, int n_vocab) { for (int k = 0; k < token; ++k) { printf(" "); } @@ -1425,14 +1423,14 @@ void print_token(int token, int n_vocab) { printf("\n"); } -void print_tokens(struct ggml_tensor * tokens, int n_vocab) { +static void print_tokens(struct ggml_tensor * tokens, int n_vocab) { for (int i=0; ine[0]; ++i) { int token = ggml_get_i32_1d(tokens, i); print_token(token, n_vocab); } } -void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { +static void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { int n_tokens = tokens_input->ne[0]; int n_vocab = targets->ne[0]; float randomness = 0.0f; @@ -1453,7 +1451,9 @@ void get_example_targets(int example_id, struct ggml_tensor * tokens_input, stru } } -void get_example_targets_batch(struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { +static void get_example_targets_batch( + struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets +) { GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT( targets->n_dims == 3); int n_tokens = tokens_input->ne[0]; @@ -1476,7 +1476,7 @@ void get_example_targets_batch(struct ggml_context * ctx, int example_id, struct } } -void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) { +static void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) { int n_tokens = tokens_input->ne[0]; int n_vocab = targets->ne[0]; for (int i=0; i #endif -namespace { - // Used for debugging to print out beam tokens. struct ostream_beam_view { llama_context * ctx; llama_beam_view beam_view; }; -std::ostream& operator<<(std::ostream& os, const ostream_beam_view & obv) { + +static std::ostream & operator<<(std::ostream & os, const ostream_beam_view & obv) { os << "p(" << obv.beam_view.p << ") eob(" << std::boolalpha << obv.beam_view.eob << ") tokens("; for (size_t i = 0 ; i < obv.beam_view.n_tokens ; ++i) { os << llama_token_to_piece(obv.ctx, obv.beam_view.tokens[i]); @@ -48,7 +47,7 @@ struct beam_search_callback_data { // In this case, end-of-beam (eob) is equivalent to end-of-sentence (eos) but this need not always be the same. // For example, eob can be flagged due to maximum token length, stop words, etc. -bool is_at_eob(const beam_search_callback_data & callback_data, const llama_token * tokens, const size_t n_tokens) { +static bool is_at_eob(const beam_search_callback_data & callback_data, const llama_token * tokens, size_t n_tokens) { return n_tokens && tokens[n_tokens-1] == llama_token_eos(callback_data.ctx); } @@ -58,7 +57,7 @@ bool is_at_eob(const beam_search_callback_data & callback_data, const llama_toke // * When all beams converge to a common prefix, they are made available in beams_state.beams[0]. // This is also called when the stop condition is met. // Collect tokens into std::vector response which is pointed to by callback_data. -void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_state) { +static void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_state) { auto& callback_data = *static_cast(callback_data_ptr); // Mark beams as EOS as needed. for (size_t i = 0 ; i < beams_state.n_beams ; ++i) { @@ -84,8 +83,6 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat #endif } -} // namespace - int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index c1f3bbb08..ea0f41ee4 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -115,9 +115,7 @@ struct TransformerWeights { } }; -namespace { - -void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) { +static void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) { // we calloc instead of malloc to keep valgrind happy w->token_embedding_table = new float[p->vocab_size * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->token_embedding_table\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim); @@ -160,7 +158,7 @@ void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) { } } -int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shared_weights) { +static int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shared_weights) { if (fread(w->token_embedding_table, sizeof(float), p->vocab_size * p->dim, f) != static_cast(p->vocab_size * p->dim)) return 1; if (fread(w->rms_att_weight, sizeof(float), p->n_layers * p->dim, f) != static_cast(p->n_layers * p->dim)) return 1; if (fread(w->wq, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast(p->n_layers * p->dim * p->dim)) return 1; @@ -191,7 +189,7 @@ int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f, bool shar return 0; } -void print_sample_weights(TransformerWeights *w){ +static void print_sample_weights(TransformerWeights *w){ printf("----- Quick print of first of the weight vales of all the variables\n"); printf("%f\n", w->token_embedding_table[0]); printf("%f\n", w->rms_att_weight[0]); @@ -326,7 +324,7 @@ struct train_params { int mem_compute1_gb; }; -void print_params(struct my_llama_hparams * params) { +static void print_params(struct my_llama_hparams * params) { printf("%s: n_vocab: %d\n", __func__, params->n_vocab); printf("%s: n_ctx: %d\n", __func__, params->n_ctx); printf("%s: n_embd: %d\n", __func__, params->n_embd); @@ -337,7 +335,7 @@ void print_params(struct my_llama_hparams * params) { printf("%s: n_rot: %d\n", __func__, params->n_rot); } -void init_model(struct my_llama_model * model) { +static void init_model(struct my_llama_model * model) { const auto & hparams = model->hparams; const uint32_t n_embd = hparams.n_embd; @@ -410,17 +408,17 @@ void init_model(struct my_llama_model * model) { } } -float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { +static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); return *ptr; } -int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { +static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); return *ptr; } -void print_row(struct ggml_tensor * probs, int i) { +static void print_row(struct ggml_tensor * probs, int i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = get_f32_2d(probs, k, i); printf(" %f", p); @@ -428,7 +426,7 @@ void print_row(struct ggml_tensor * probs, int i) { printf("\n"); } -void print_matrix(struct ggml_tensor * probs) { +static void print_matrix(struct ggml_tensor * probs) { assert(probs->n_dims == 2); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { @@ -551,7 +549,7 @@ std::string llama_escape_whitespaces(const std::string& text) { return out.str(); } -void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) { +static void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) { if (is_ggml_file(filename)) { struct ggml_context * ctx_data = NULL; @@ -639,7 +637,7 @@ void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) } } -void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) { +static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) { int ct; switch (gg_weights->n_dims){ case 1: @@ -675,7 +673,9 @@ void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * kar } } -void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename) { +static void save_as_llama_model( + struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename +) { // convert AK weights into GG weights one by one. // w->token_embedding_table -> model->tok_embeddings // float* -> struct ggml_tensor @@ -787,7 +787,7 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod gguf_free(ctx); } -struct train_params get_default_train_params() { +static struct train_params get_default_train_params() { struct train_params params; params.fn_vocab_model = "models/7B/ggml-model-f16.gguf"; params.fn_llama2c_output_model = "ak_llama_model.bin"; @@ -837,7 +837,7 @@ struct train_params get_default_train_params() { return params; } -void print_usage(int /*argc*/, char ** argv, const struct train_params * params) { +static void print_usage(int /*argc*/, char ** argv, const struct train_params * params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); @@ -848,7 +848,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params) fprintf(stderr, "\n"); } -bool params_parse(int argc, char ** argv, struct train_params * params) { +static bool params_parse(int argc, char ** argv, struct train_params * params) { bool invalid_param = false; bool reqd_param_found = false; std::string arg; @@ -903,7 +903,7 @@ bool params_parse(int argc, char ** argv, struct train_params * params) { return true; } -std::string basename(const std::string &path) { +static std::string basename(const std::string &path) { size_t pos = path.find_last_of("/\\"); if (pos == std::string::npos) { return path; @@ -911,8 +911,6 @@ std::string basename(const std::string &path) { return path.substr(pos + 1); } -} // namespace - int main(int argc, char ** argv) { struct train_params params = get_default_train_params(); if (!params_parse(argc, argv, ¶ms)) { diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 9601e0f6e..9ab63a293 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -13,16 +13,14 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) -namespace { - -template -std::string to_string(const T & val) { +template +static std::string to_string(const T & val) { std::stringstream ss; ss << val; return ss.str(); } -bool gguf_ex_write(const std::string & fname) { +static bool gguf_ex_write(const std::string & fname) { struct gguf_context * ctx = gguf_init_empty(); gguf_set_val_u8 (ctx, "some.parameter.uint8", 0x12); @@ -87,7 +85,7 @@ bool gguf_ex_write(const std::string & fname) { } // just read tensor info -bool gguf_ex_read_0(const std::string & fname) { +static bool gguf_ex_read_0(const std::string & fname) { struct gguf_init_params params = { /*.no_alloc = */ false, /*.ctx = */ NULL, @@ -145,7 +143,7 @@ bool gguf_ex_read_0(const std::string & fname) { } // read and create ggml_context containing the tensors and their data -bool gguf_ex_read_1(const std::string & fname) { +static bool gguf_ex_read_1(const std::string & fname) { struct ggml_context * ctx_data = NULL; struct gguf_init_params params = { @@ -229,8 +227,6 @@ bool gguf_ex_read_1(const std::string & fname) { return true; } -} // namespace - int main(int argc, char ** argv) { if (argc < 3) { printf("usage: %s data.gguf r|w\n", argv[0]); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 5d1e2c2af..7ec74b563 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -33,17 +33,16 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -namespace { +static llama_context ** g_ctx; +static llama_model ** g_model; +static gpt_params * g_params; +static std::vector * g_input_tokens; +static std::ostringstream * g_output_ss; +static std::vector * g_output_tokens; +static bool is_interacting = false; -llama_context ** g_ctx; -llama_model ** g_model; -gpt_params * g_params; -std::vector * g_input_tokens; -std::ostringstream * g_output_ss; -std::vector * g_output_tokens; -bool is_interacting = false; -void write_logfile( +static void write_logfile( const llama_context * ctx, const gpt_params & params, const llama_model * model, const std::vector & input_tokens, const std::string & output, const std::vector & output_tokens @@ -88,7 +87,7 @@ void write_logfile( } #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) -void sigint_handler(int signo) { +static void sigint_handler(int signo) { if (signo == SIGINT) { if (!is_interacting) { is_interacting = true; @@ -103,8 +102,6 @@ void sigint_handler(int signo) { } #endif -} // namespace - int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 474ce3158..85505e459 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -28,11 +28,10 @@ struct results_log_softmax { float prob; }; -namespace { - -void write_logfile(const llama_context * ctx, const gpt_params & params, - const llama_model * model, const struct results_perplexity & results) { - +static void write_logfile( + const llama_context * ctx, const gpt_params & params, const llama_model * model, + const struct results_perplexity & results +) { if (params.logdir.empty()) { return; } @@ -78,7 +77,7 @@ void write_logfile(const llama_context * ctx, const gpt_params & params, fclose(logfile); } -std::vector softmax(const std::vector& logits) { +static std::vector softmax(const std::vector& logits) { std::vector probs(logits.size()); float max_logit = logits[0]; for (float v : logits) max_logit = std::max(max_logit, v); @@ -94,7 +93,7 @@ std::vector softmax(const std::vector& logits) { return probs; } -results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) { +static results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) { float max_logit = logits[0]; for (int i = 1; i < n_vocab; ++i) max_logit = std::max(max_logit, logits[i]); double sum_exp = 0.0; @@ -102,9 +101,10 @@ results_log_softmax log_softmax(int n_vocab, const float * logits, int tok) { return {logits[tok] - max_logit - log(sum_exp), logits[tok], expf(logits[tok] - max_logit) / (float) sum_exp}; } -void process_logits(int n_vocab, const float * logits, const int * tokens, int n_token, std::vector & workers, - double & nll, double & nll2, float * logit_history, float * prob_history) { - +static void process_logits( + int n_vocab, const float * logits, const int * tokens, int n_token, std::vector & workers, + double & nll, double & nll2, float * logit_history, float * prob_history +) { std::mutex mutex; int counter = 0; auto compute = [&mutex, &counter, &nll, &nll2, logit_history, prob_history, n_vocab, logits, tokens, n_token] () { @@ -402,8 +402,9 @@ results_perplexity perplexity(llama_context * ctx, const gpt_params & params) { return {tokens, ppl, logit_history, prob_history}; } -std::vector hellaswag_evaluate_tokens(llama_context * ctx, const std::vector& tokens, int n_past, int n_batch, - int n_vocab, int n_thread) { +static std::vector hellaswag_evaluate_tokens( + llama_context * ctx, const std::vector& tokens, int n_past, int n_batch, int n_vocab, int n_thread +) { std::vector result; result.reserve(tokens.size() * n_vocab); size_t n_chunk = (tokens.size() + n_batch - 1)/n_batch; @@ -423,7 +424,7 @@ std::vector hellaswag_evaluate_tokens(llama_context * ctx, const std::vec return result; } -void hellaswag_score(llama_context * ctx, const gpt_params & params) { +static void hellaswag_score(llama_context * ctx, const gpt_params & params) { // Calculates hellaswag score (acc_norm) from prompt // // Data extracted from the HellaSwag validation dataset (MIT license) https://github.com/rowanz/hellaswag/blob/master/data/hellaswag_val.jsonl @@ -653,8 +654,6 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { printf("\n"); } -} // namespace - int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 3d194f7fc..b2887ff81 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -44,9 +44,7 @@ struct error_stats { uint64_t error_histogram[HISTOGRAM_BUCKETS]; }; -namespace { - -void quantize_stats_print_usage(int /*argc*/, char ** argv) { +static void quantize_stats_print_usage(int /*argc*/, char ** argv) { quantize_stats_params params; fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); @@ -72,7 +70,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) { } // Check if a layer is included/excluded by command line -bool layer_included(const quantize_stats_params & params, const std::string & layer) { +static bool layer_included(const quantize_stats_params & params, const std::string & layer) { for (const auto& excluded : params.exclude_layers) { if (std::regex_search(layer, std::regex(excluded))) { return false; @@ -87,7 +85,7 @@ bool layer_included(const quantize_stats_params & params, const std::string & la } // Update error statistics given vectors with the before/after result of quantization -void update_error_stats(int64_t nelements, const float * input, const float * output, error_stats & stats) { +static void update_error_stats(int64_t nelements, const float * input, const float * output, error_stats & stats) { for (int64_t i = 0; i < nelements; i++) { double diff = input[i] - output[i]; stats.total_error += diff * diff; @@ -97,7 +95,7 @@ void update_error_stats(int64_t nelements, const float * input, const float * ou stats.num_samples += nelements; } -void combine_error_stats(error_stats & into, const error_stats & from) { +static void combine_error_stats(error_stats & into, const error_stats & from) { into.num_samples += from.num_samples; into.total_error += from.total_error; if (from.max_error > into.max_error) into.max_error = from.max_error; @@ -117,7 +115,7 @@ double find_quantile(const error_stats & stats, double quantile) { return INFINITY; } -void print_error_stats(const std::string & name, const error_stats & stats, bool print_histogram) { +static void print_error_stats(const std::string & name, const error_stats & stats, bool print_histogram) { double rmse = sqrt(stats.total_error / (double) stats.num_samples); double median = find_quantile(stats, .5); double pct95 = find_quantile(stats, .95); @@ -134,7 +132,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool } // copied from ggml.h - verify that we can access this as a flat array -bool tensor_is_contiguous(const struct ggml_tensor * tensor) { +static bool tensor_is_contiguous(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return @@ -144,17 +142,10 @@ bool tensor_is_contiguous(const struct ggml_tensor * tensor) { tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } -void test_roundtrip_on_chunk( - const ggml_tensor * layer, - int64_t offset, - int64_t chunk_size, - const ggml_type_traits_t & qfns, - bool use_reference, - float * input_scratch, - char * quantized_scratch, - float * output_scratch, - error_stats & stats) { - +static void test_roundtrip_on_chunk( + const ggml_tensor * layer, int64_t offset, int64_t chunk_size, const ggml_type_traits_t & qfns, bool use_reference, + float * input_scratch, char * quantized_scratch, float * output_scratch, error_stats & stats +) { if (layer->type == GGML_TYPE_F16) { for (int i = 0; i < chunk_size; i++) { input_scratch[i] = ggml_get_f32_1d(layer, i + offset); @@ -175,18 +166,11 @@ void test_roundtrip_on_chunk( // Run quantization function for a single layer and update error stats -void test_roundtrip_on_layer( - std::string & name, - bool print_layer_stats, - const ggml_type_traits_t & qfns, - bool use_reference, - const ggml_tensor * layer, - std::vector & input_scratch, - std::vector & quantized_scratch, - std::vector & output_scratch, - error_stats & total_error, - int max_thread = 0) { - +static void test_roundtrip_on_layer( + std::string & name, bool print_layer_stats, const ggml_type_traits_t & qfns, bool use_reference, + const ggml_tensor * layer, std::vector & input_scratch, std::vector & quantized_scratch, + std::vector & output_scratch, error_stats & total_error, int max_thread = 0 +) { assert(tensor_is_contiguous(layer)); error_stats layer_error {}; uint64_t nelements = ggml_nelements(layer); @@ -239,8 +223,6 @@ void test_roundtrip_on_layer( } } -} // namespace - int main(int argc, char ** argv) { ggml_time_init(); diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 85a004946..300788c91 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -7,15 +7,13 @@ #include #include -namespace { - struct quant_option { std::string name; llama_ftype ftype; std::string desc; }; -const std::vector QUANT_OPTIONS = { +static const std::vector QUANT_OPTIONS = { { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", }, { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", }, { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", }, @@ -42,7 +40,7 @@ const std::vector QUANT_OPTIONS = { }; -bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { +static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { std::string ftype_str; for (auto ch : ftype_str_in) { @@ -74,7 +72,7 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std: // usage: // ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads] // -void usage(const char * executable) { +static void usage(const char * executable) { printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable); printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); @@ -90,8 +88,6 @@ void usage(const char * executable) { exit(1); } -} // namespace - int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 23ce5fcb7..9d9c69d08 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -26,8 +26,6 @@ using namespace httplib; using json = nlohmann::json; -namespace { - struct server_params { std::string hostname = "127.0.0.1"; @@ -65,7 +63,7 @@ enum stop_type STOP_PARTIAL, }; -bool ends_with(const std::string & str, const std::string & suffix) +static bool ends_with(const std::string & str, const std::string & suffix) { return str.size() >= suffix.size() && 0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix); @@ -102,7 +100,7 @@ std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) return ret; } -void server_log( +static void server_log( const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra ) { nlohmann::ordered_json log{ @@ -163,7 +161,7 @@ json probs_vector_to_json(const llama_context * ctx, const std::vector response which is pointed to by callback_data. -void beam_search_callback(void * callback_data, llama_beams_state beams_state) { +static void beam_search_callback(void * callback_data, llama_beams_state beams_state) { auto & llama = *static_cast(callback_data); // Mark beams as EOS as needed. for (size_t i = 0 ; i < beams_state.n_beams ; ++i) { @@ -1259,7 +1257,7 @@ struct token_translator { std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); } }; -void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { +static void append_to_generated_text_from_generated_token_probs(llama_server_context & llama) { auto & gtps = llama.generated_token_probs; auto translator = token_translator{llama.ctx}; auto add_strlen = [=](size_t sum, const completion_token_output & cto) { return sum + translator(cto).size(); }; @@ -1272,10 +1270,7 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context & } } -} // namespace - -int main(int argc, char **argv) -{ +int main(int argc, char **argv) { // own arguments required by this example gpt_params params; server_params sparams; diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 785f7be62..1f1cd6178 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -18,8 +18,6 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -namespace { - struct random_normal_distribution { std::mt19937 gen; std::normal_distribution rd; @@ -32,35 +30,37 @@ struct random_uniform_distribution { std::uniform_real_distribution rd; }; -void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) { +static void init_random_normal_distribution( + struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max +) { rnd->gen = std::mt19937(seed); rnd->rd = std::normal_distribution{mean, std}; rnd->min = min; rnd->max = max; } -void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) { +static void init_random_uniform_distribution(struct random_uniform_distribution * rnd, int seed, float min, float max) { rnd->gen = std::mt19937(seed); rnd->rd = std::uniform_real_distribution{min, max}; } -int clamp(const int v, const int min, const int max) { +static int clamp(const int v, const int min, const int max) { return ((v < min) ? (min) : (v > max) ? (max) : v); } -float fclamp(const float v, const float min, const float max) { +static float fclamp(const float v, const float min, const float max) { return ((v < min) ? (min) : (v > max) ? (max) : v); } -float frand() { +static float frand() { return (float)rand()/(float)RAND_MAX; } -float frand_normal(struct random_normal_distribution * rnd) { +static float frand_normal(struct random_normal_distribution * rnd) { return fclamp(rnd->rd(rnd->gen), rnd->min, rnd->max); } -float frand_uniform(struct random_uniform_distribution * rnd) { +static float frand_uniform(struct random_uniform_distribution * rnd) { return rnd->rd(rnd->gen); } @@ -210,85 +210,85 @@ struct my_llama_model { }; // gguf constants -const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type"; -const char * LLM_KV_OPTIMIZER_TYPE_ADAM = "adam"; -const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs"; -const char * LLM_KV_OPTIMIZER_FILE_VERSION = "optimizer.file_version"; -const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT = "optimizer.convergence_past_count"; -const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT = "optimizer.parameter_count"; -const char * LLM_KV_OPTIMIZER_ITERATION_COUNT = "optimizer.iteration_count"; -const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED = "optimizer.just_initialized"; -const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS = "optimizer.adam.best_loss"; -const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS = "optimizer.adam.previous_loss"; -const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT = "optimizer.adam.no_improvement_count"; -const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count"; -const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS = "optimizer.lbfgs.best_loss"; -const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP = "optimizer.lbfgs.line_search_step"; -const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J = "optimizer.lbfgs.line_search_j"; -const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K = "optimizer.lbfgs.line_search_k"; -const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END = "optimizer.lbfgs.line_search_end"; -const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count"; +static const char * LLM_KV_OPTIMIZER_TYPE = "optimizer.type"; +static const char * LLM_KV_OPTIMIZER_TYPE_ADAM = "adam"; +static const char * LLM_KV_OPTIMIZER_TYPE_LBFGS = "lbfgs"; +static const char * LLM_KV_OPTIMIZER_FILE_VERSION = "optimizer.file_version"; +static const char * LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT = "optimizer.convergence_past_count"; +static const char * LLM_KV_OPTIMIZER_PARAMETER_COUNT = "optimizer.parameter_count"; +static const char * LLM_KV_OPTIMIZER_ITERATION_COUNT = "optimizer.iteration_count"; +static const char * LLM_KV_OPTIMIZER_JUST_INITIALIZED = "optimizer.just_initialized"; +static const char * LLM_KV_OPTIMIZER_ADAM_BEST_LOSS = "optimizer.adam.best_loss"; +static const char * LLM_KV_OPTIMIZER_ADAM_PREVIOUS_LOSS = "optimizer.adam.previous_loss"; +static const char * LLM_KV_OPTIMIZER_ADAM_NO_IMPROVEMENT_COUNT = "optimizer.adam.no_improvement_count"; +static const char * LLM_KV_OPTIMIZER_LBFGS_APPROX_HESSIAN_COUNT = "optimizer.lbfgs.approx_hessian_count"; +static const char * LLM_KV_OPTIMIZER_LBFGS_BEST_LOSS = "optimizer.lbfgs.best_loss"; +static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_STEP = "optimizer.lbfgs.line_search_step"; +static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_J = "optimizer.lbfgs.line_search_j"; +static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_K = "optimizer.lbfgs.line_search_k"; +static const char * LLM_KV_OPTIMIZER_LBFGS_LINE_SEARCH_END = "optimizer.lbfgs.line_search_end"; +static const char * LLM_KV_OPTIMIZER_LBFGS_NO_IMPROVEMENT_COUNT = "optimizer.lbfgs.no_improvement_count"; -const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS = "optimizer.adam.first_moments"; -const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS = "optimizer.adam.second_moments"; -const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values"; +static const char * LLM_TENSOR_OPTIMIZER_ADAM_FIRST_MOMENTS = "optimizer.adam.first_moments"; +static const char * LLM_TENSOR_OPTIMIZER_ADAM_SECOND_MOMENTS = "optimizer.adam.second_moments"; +static const char * LLM_TENSOR_OPTIMIZER_ADAM_PAST_LOSS_VALUES = "optimizer.adam.past_loss_values"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS = "optimizer.lbfgs.current_parameters"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS = "optimizer.lbfgs.current_gradients"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS = "optimizer.lbfgs.previous_gradients"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION = "optimizer.lbfgs.search_direction"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES = "optimizer.lbfgs.past_loss_values"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA = "optimizer.lbfgs.memory_alpha"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS = "optimizer.lbfgs.memory_ys"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S = "optimizer.lbfgs.memory_s"; -const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y = "optimizer.lbfgs.memory_y"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_PARAMETERS = "optimizer.lbfgs.current_parameters"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_PARAMETERS = "optimizer.lbfgs.previous_parameters"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_CURRENT_GRADIENTS = "optimizer.lbfgs.current_gradients"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PREVIOUS_GRADIENTS = "optimizer.lbfgs.previous_gradients"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_SEARCH_DIRECTION = "optimizer.lbfgs.search_direction"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_PAST_LOSS_VALUES = "optimizer.lbfgs.past_loss_values"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_ALPHA = "optimizer.lbfgs.memory_alpha"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_YS = "optimizer.lbfgs.memory_ys"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_S = "optimizer.lbfgs.memory_s"; +static const char * LLM_TENSOR_OPTIMIZER_LBFGS_MEMORY_Y = "optimizer.lbfgs.memory_y"; -const char * LLM_KV_TRAINING_FILE_VERSION = "training.file_version"; -const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count"; -const char * LLM_KV_TRAINING_SAMPLE_COUNT = "training.sample_count"; -const char * LLM_KV_TRAINING_TOKEN_COUNT = "training.token_count"; +static const char * LLM_KV_TRAINING_FILE_VERSION = "training.file_version"; +static const char * LLM_KV_TRAINING_ITERATION_COUNT = "training.iteration_count"; +static const char * LLM_KV_TRAINING_SAMPLE_COUNT = "training.sample_count"; +static const char * LLM_KV_TRAINING_TOKEN_COUNT = "training.token_count"; // gguf constants (sync with gguf.py) -const char * LLM_KV_GENERAL_ARCHITECTURE = "general.architecture"; -const char * LLM_KV_GENERAL_FILE_TYPE = "general.file_type"; +static const char * LLM_KV_GENERAL_ARCHITECTURE = "general.architecture"; +static const char * LLM_KV_GENERAL_FILE_TYPE = "general.file_type"; -const char * LLM_KV_CONTEXT_LENGTH = "%s.context_length"; -const char * LLM_KV_EMBEDDING_LENGTH = "%s.embedding_length"; -const char * LLM_KV_BLOCK_COUNT = "%s.block_count"; -const char * LLM_KV_FEED_FORWARD_LENGTH = "%s.feed_forward_length"; -const char * LLM_KV_ATTENTION_HEAD_COUNT = "%s.attention.head_count"; -const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon"; -const char * LLM_KV_ROPE_DIMENSION_COUNT = "%s.rope.dimension_count"; -const char * LLM_KV_ROPE_FREQ_BASE = "%s.rope.freq_base"; // TODO load in llama.cpp -const char * LLM_KV_ROPE_SCALE_LINEAR = "%s.rope.scale_linear"; +static const char * LLM_KV_CONTEXT_LENGTH = "%s.context_length"; +static const char * LLM_KV_EMBEDDING_LENGTH = "%s.embedding_length"; +static const char * LLM_KV_BLOCK_COUNT = "%s.block_count"; +static const char * LLM_KV_FEED_FORWARD_LENGTH = "%s.feed_forward_length"; +static const char * LLM_KV_ATTENTION_HEAD_COUNT = "%s.attention.head_count"; +static const char * LLM_KV_ATTENTION_LAYERNORM_RMS_EPS = "%s.attention.layer_norm_rms_epsilon"; +static const char * LLM_KV_ROPE_DIMENSION_COUNT = "%s.rope.dimension_count"; +static const char * LLM_KV_ROPE_FREQ_BASE = "%s.rope.freq_base"; // TODO load in llama.cpp +static const char * LLM_KV_ROPE_SCALE_LINEAR = "%s.rope.scale_linear"; -const char * LLM_KV_TOKENIZER_MODEL = "tokenizer.ggml.model"; -const char * LLM_KV_TOKENIZER_LIST = "tokenizer.ggml.tokens"; -const char * LLM_KV_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"; -const char * LLM_KV_TOKENIZER_SCORES = "tokenizer.ggml.scores"; -const char * LLM_KV_TOKENIZER_MERGES = "tokenizer.ggml.merges"; -const char * LLM_KV_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"; -const char * LLM_KV_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"; -const char * LLM_KV_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"; -const char * LLM_KV_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"; -const char * LLM_KV_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"; +static const char * LLM_KV_TOKENIZER_MODEL = "tokenizer.ggml.model"; +static const char * LLM_KV_TOKENIZER_LIST = "tokenizer.ggml.tokens"; +static const char * LLM_KV_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"; +static const char * LLM_KV_TOKENIZER_SCORES = "tokenizer.ggml.scores"; +static const char * LLM_KV_TOKENIZER_MERGES = "tokenizer.ggml.merges"; +static const char * LLM_KV_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"; +static const char * LLM_KV_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"; +static const char * LLM_KV_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"; +static const char * LLM_KV_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"; +static const char * LLM_KV_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"; -const char * LLM_TENSOR_TOKEN_EMBD = "token_embd"; -const char * LLM_TENSOR_OUTPUT_NORM = "output_norm"; -const char * LLM_TENSOR_OUTPUT = "output"; -const char * LLM_TENSOR_ATTN_NORM = "blk.%d.attn_norm"; -const char * LLM_TENSOR_ATTN_Q = "blk.%d.attn_q"; -const char * LLM_TENSOR_ATTN_K = "blk.%d.attn_k"; -const char * LLM_TENSOR_ATTN_V = "blk.%d.attn_v"; -const char * LLM_TENSOR_ATTN_OUT = "blk.%d.attn_output"; -const char * LLM_TENSOR_FFN_NORM = "blk.%d.ffn_norm"; -const char * LLM_TENSOR_FFN_GATE = "blk.%d.ffn_gate"; -const char * LLM_TENSOR_FFN_DOWN = "blk.%d.ffn_down"; -const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up"; +static const char * LLM_TENSOR_TOKEN_EMBD = "token_embd"; +static const char * LLM_TENSOR_OUTPUT_NORM = "output_norm"; +static const char * LLM_TENSOR_OUTPUT = "output"; +static const char * LLM_TENSOR_ATTN_NORM = "blk.%d.attn_norm"; +static const char * LLM_TENSOR_ATTN_Q = "blk.%d.attn_q"; +static const char * LLM_TENSOR_ATTN_K = "blk.%d.attn_k"; +static const char * LLM_TENSOR_ATTN_V = "blk.%d.attn_v"; +static const char * LLM_TENSOR_ATTN_OUT = "blk.%d.attn_output"; +static const char * LLM_TENSOR_FFN_NORM = "blk.%d.ffn_norm"; +static const char * LLM_TENSOR_FFN_GATE = "blk.%d.ffn_gate"; +static const char * LLM_TENSOR_FFN_DOWN = "blk.%d.ffn_down"; +static const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up"; -void print_params(struct my_llama_hparams * params) { +static void print_params(struct my_llama_hparams * params) { printf("%s: n_vocab: %d\n", __func__, params->n_vocab); printf("%s: n_ctx: %d\n", __func__, params->n_ctx); printf("%s: n_embd: %d\n", __func__, params->n_embd); @@ -298,7 +298,7 @@ void print_params(struct my_llama_hparams * params) { printf("%s: n_rot: %d\n", __func__, params->n_rot); } -void init_model(struct my_llama_model * model) { +static void init_model(struct my_llama_model * model) { const auto & hparams = model->hparams; const uint32_t n_embd = hparams.n_embd; @@ -365,7 +365,7 @@ void init_model(struct my_llama_model * model) { } } -void set_param_model(struct my_llama_model * model) { +static void set_param_model(struct my_llama_model * model) { const auto& hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -391,7 +391,7 @@ void set_param_model(struct my_llama_model * model) { } } -void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) { +static void randomize_model(struct my_llama_model * model, int seed, float mean, float std, float min, float max) { const auto & hparams = model->hparams; const uint32_t n_layer = hparams.n_layer; @@ -420,25 +420,25 @@ void randomize_model(struct my_llama_model * model, int seed, float mean, float } } -void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { +static void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { GGML_ASSERT(tensor->n_dims == 1); GGML_ASSERT(tensor->ne[0] == ne0); } -void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { +static void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { GGML_ASSERT(tensor->n_dims == 2); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); } -void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { +static void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { GGML_ASSERT(tensor->n_dims == 3); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); GGML_ASSERT(tensor->ne[2] == ne2); } -void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { +static void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { GGML_ASSERT(tensor->n_dims == 4); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); @@ -465,7 +465,7 @@ size_t hash_find(void * hash_table[], void * p) { return i; } -bool hash_insert(void * hash_table[], void * p) { +static bool hash_insert(void * hash_table[], void * p) { //size_t h = hash(p); size_t i = hash_find(hash_table, p); @@ -481,7 +481,7 @@ bool hash_insert(void * hash_table[], void * p) { return false; } -bool hash_contains(void * hash_table[], void * p) { +static bool hash_contains(void * hash_table[], void * p) { size_t i = hash_find(hash_table, p); return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p); } @@ -500,11 +500,11 @@ struct hash_map * new_hash_map() { return result; }; -void free_hash_map(struct hash_map * map) { +static void free_hash_map(struct hash_map * map) { delete map; } -bool ggml_is_view(struct ggml_tensor * t) { +static bool ggml_is_view(struct ggml_tensor * t) { return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE || t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY; } @@ -597,13 +597,14 @@ struct ggml_tensor * ggml_recompute_graph_node( return clone; }; -void ggml_build_backward_gradient_checkpointing( - struct ggml_context * ctx, - struct ggml_cgraph * gf, - struct ggml_cgraph * gb, - struct ggml_cgraph * gb_tmp, - struct ggml_tensor * * checkpoints, - int n_checkpoints) { +static void ggml_build_backward_gradient_checkpointing( + struct ggml_context * ctx, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb, + struct ggml_cgraph * gb_tmp, + struct ggml_tensor ** checkpoints, + int n_checkpoints +) { *gb_tmp = *gf; ggml_build_backward_expand(ctx, gf, gb_tmp, true); @@ -827,22 +828,22 @@ struct ggml_tensor * llama_build_train_graphs( return t36; } -void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) { +static void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]); *ptr = value; } -void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { +static void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); *ptr = value; } -void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) { +static void set_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int32_t value) { int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); *ptr = value; } -float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { +static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); return *ptr; } @@ -852,7 +853,7 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) { return *ptr; } -void print_row(struct ggml_tensor * probs, int i) { +static void print_row(struct ggml_tensor * probs, int i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = get_f32_2d(probs, k, i); printf(" %.2f", p); @@ -860,7 +861,7 @@ void print_row(struct ggml_tensor * probs, int i) { printf("\n"); } -void print_matrix(struct ggml_tensor * probs) { +static void print_matrix(struct ggml_tensor * probs) { assert(probs->n_dims == 2); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { @@ -871,7 +872,11 @@ void print_matrix(struct ggml_tensor * probs) { } } -void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { +static void get_example_targets( + struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, + size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, + struct ggml_tensor * target_probs +) { int n_tokens = tokens_input->ne[0]; int n_vocab = target_logits->ne[0]; @@ -891,7 +896,11 @@ void get_example_targets(struct llama_context * lctx, const int * train_samples, } } -void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { +static void get_example_targets_batch( + struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, + size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, + struct ggml_tensor * target_probs +) { GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT(target_logits->n_dims == 3); GGML_ASSERT(target_probs->n_dims == 3); @@ -926,7 +935,7 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa } } -int tokenize_file(struct llama_context * lctx, const char * filename, std::vector& out) { +static int tokenize_file(struct llama_context * lctx, const char * filename, std::vector & out) { FILE * fp = std::fopen(filename, "rb"); if (fp == NULL) { return 0; @@ -997,7 +1006,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto return n_tokens; } -void shuffle_ints(int * begin, int * end) { +static void shuffle_ints(int * begin, int * end) { if (end <= begin) return; int max=begin[0]; for (int i=1; itype == b->type); @@ -1041,7 +1050,7 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) { return true; } -void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { +static void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { if (dst == NULL) { return; } @@ -1054,7 +1063,9 @@ void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, co } } -void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt) { +static void load_opt_context_gguf( + struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct ggml_opt_context * opt +) { // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read uint32_t file_version; @@ -1115,7 +1126,7 @@ void load_opt_context_gguf(struct gguf_context * fctx, struct ggml_context * f_g } } -void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) { +static void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * opt) { gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_FILE_VERSION, 0); gguf_set_val_u32(fctx, LLM_KV_OPTIMIZER_CONVERGENCE_PAST_COUNT, opt->params.past); gguf_set_val_u64(fctx, LLM_KV_OPTIMIZER_PARAMETER_COUNT, (uint64_t) opt->nx); @@ -1182,7 +1193,9 @@ void save_opt_context_gguf(struct gguf_context * fctx, struct ggml_opt_context * } } -void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) { +static void load_llama_model_gguf( + struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model +) { // NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read std::string arch; @@ -1253,7 +1266,9 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g } } -void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model) { +static void save_llama_model_gguf( + struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model +) { const char * arch = "llama"; enum llama_ftype ftype = LLAMA_FTYPE_ALL_F32; @@ -1396,7 +1411,7 @@ void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_mod } } -void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) { +static void save_llama_model_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model) { struct gguf_context * fctx = gguf_init_empty(); save_llama_model_gguf(fctx, fn_vocab_model, model); @@ -1407,7 +1422,10 @@ void save_llama_model_file(const char * filename, const char * fn_vocab_model, s gguf_free(fctx); } -void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, struct ggml_opt_context * opt) { +static void load_checkpoint_gguf( + struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model, + struct ggml_opt_context * opt +) { load_llama_model_gguf(fctx, f_ggml_ctx, model); uint32_t file_version; @@ -1421,7 +1439,10 @@ void load_checkpoint_gguf(struct gguf_context * fctx, struct ggml_context * f_gg load_opt_context_gguf(fctx, f_ggml_ctx, opt); } -void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) { +static void save_checkpoint_gguf( + struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model, + struct ggml_opt_context * opt +) { save_llama_model_gguf(fctx, fn_vocab_model, model); gguf_set_val_u32(fctx, LLM_KV_TRAINING_FILE_VERSION, 0); @@ -1432,7 +1453,7 @@ void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_vocab_mode save_opt_context_gguf(fctx, opt); } -bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) { +static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct ggml_opt_context * opt) { struct ggml_context * f_ggml_ctx; struct gguf_init_params params; params.no_alloc = false; @@ -1447,7 +1468,9 @@ bool load_checkpoint_file(const char * filename, struct my_llama_model * model, return true; } -void save_checkpoint_file(const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt) { +static void save_checkpoint_file( + const char * filename, const char * fn_vocab_model, struct my_llama_model * model, struct ggml_opt_context * opt +) { struct gguf_context * fctx = gguf_init_empty(); save_checkpoint_gguf(fctx, fn_vocab_model, model, opt); @@ -1458,7 +1481,7 @@ void save_checkpoint_file(const char * filename, const char * fn_vocab_model, st gguf_free(fctx); } -float cosine_decay(const int decay_steps, const float minimum, int step) { +static float cosine_decay(const int decay_steps, const float minimum, int step) { if (step > decay_steps) { step = decay_steps; } @@ -1467,7 +1490,9 @@ float cosine_decay(const int decay_steps, const float minimum, int step) { return decay; } -float cosine_decay_restart(int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart) { +static float cosine_decay_restart( + int decay_steps, const float minimum, int step, float restart_step_mult, bool enable_restart +) { if (enable_restart) { while (step > decay_steps) { step -= decay_steps; @@ -1595,7 +1620,7 @@ struct train_params get_default_train_params() { return params; } -void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) { +static void train_print_usage(int /*argc*/, char ** argv, const struct train_params * params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); @@ -1652,7 +1677,7 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p fprintf(stderr, "\n"); } -bool train_params_parse(int argc, char ** argv, struct train_params * params) { +static bool train_params_parse(int argc, char ** argv, struct train_params * params) { bool invalid_param = false; std::string arg; struct train_params default_params = get_default_train_params(); @@ -1946,7 +1971,7 @@ struct opt_callback_data { struct ggml_tensor * target_probs; }; -void opt_callback(void * vdata, float * sched) { +static void opt_callback(void * vdata, float * sched) { struct opt_callback_data * data = (struct opt_callback_data *) vdata; struct train_params * params = data->params; struct ggml_opt_context * opt = data->opt; @@ -1989,8 +2014,6 @@ void opt_callback(void * vdata, float * sched) { data->shuffle_countdown -= n_batch; } -} // namespace - int main(int argc, char ** argv) { struct train_params params = get_default_train_params(); diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 048a3bde1..e96372c4b 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -16,9 +16,7 @@ constexpr int kVecSize = 1 << 18; -namespace { - -float drawFromGaussianPdf(std::mt19937& rndm) { +static float drawFromGaussianPdf(std::mt19937& rndm) { constexpr double kScale = 1./(1. + std::mt19937::max()); constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale; static float lastX; @@ -30,7 +28,8 @@ float drawFromGaussianPdf(std::mt19937& rndm) { haveX = true; return r*cos(phi); } -void fillRandomGaussianFloats(std::vector& values, std::mt19937& rndm, float mean = 0) { + +static void fillRandomGaussianFloats(std::vector& values, std::mt19937& rndm, float mean = 0) { for (auto& v : values) v = mean + drawFromGaussianPdf(rndm); } @@ -220,8 +219,6 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) { *s = sumf; } -} // namespace - int main(int argc, char** argv) { int nloop = argc > 1 ? atoi(argv[1]) : 10; diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp index af4bac233..25f7a532e 100644 --- a/tests/test-opt.cpp +++ b/tests/test-opt.cpp @@ -36,17 +36,15 @@ #define GGML_PRINT(...) printf(__VA_ARGS__) -namespace { - -float frand(void) { +static float frand(void) { return (float)rand()/(float)RAND_MAX; } -int irand(int n) { +static int irand(int n) { return rand()%n; } -void get_random_dims(int64_t * dims, int ndims) { +static void get_random_dims(int64_t * dims, int ndims) { dims[0] = dims[1] = dims[2] = dims[3] = 1; for (int i = 0; i < ndims; i++) { @@ -54,7 +52,7 @@ void get_random_dims(int64_t * dims, int ndims) { } } -void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) { +static void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) { dims[0] = dims[1] = dims[2] = dims[3] = 1; for (int i = 0; i < ndims; i++) { @@ -111,16 +109,14 @@ struct ggml_tensor * get_random_tensor( return result; } -float get_element(const struct ggml_tensor * t, int idx) { +static float get_element(const struct ggml_tensor * t, int idx) { return ((float *)t->data)[idx]; } -void set_element(struct ggml_tensor * t, int idx, float value) { +static void set_element(struct ggml_tensor * t, int idx, float value) { ((float *)t->data)[idx] = value; } -} // namespace - int main(void) { struct ggml_init_params params = { /* .mem_size = */ 1024*1024*1024, diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 6ec719dae..884af4054 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -19,20 +19,18 @@ constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f; +static const char* RESULT_STR[] = {"ok", "FAILED"}; -namespace { - -const char* RESULT_STR[] = {"ok", "FAILED"}; // Generate synthetic data -void generate_data(float offset, size_t n, float * dst) { +static void generate_data(float offset, size_t n, float * dst) { for (size_t i = 0; i < n; i++) { dst[i] = 0.1 + 2*cosf(i + offset); } } // Calculate RMSE between two float arrays -float array_rmse(const float * a1, const float * a2, size_t n) { +static float array_rmse(const float * a1, const float * a2, size_t n) { double sum = 0; for (size_t i = 0; i < n; i++) { double diff = a1[i] - a2[i]; @@ -42,7 +40,7 @@ float array_rmse(const float * a1, const float * a2, size_t n) { } // Total quantization error on test data -float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) { +static float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) { std::vector tmp_q(2*test_size); std::vector tmp_out(test_size); @@ -52,7 +50,7 @@ float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, cons } // Total quantization error on test data -float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) { +static float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) { std::vector tmp_q(2*test_size); std::vector tmp_out(test_size); std::vector tmp_out_ref(test_size); @@ -66,7 +64,7 @@ float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size); } -float dot_product(const float * a1, const float * a2, size_t test_size) { +static float dot_product(const float * a1, const float * a2, size_t test_size) { double sum = 0; for (size_t i = 0; i < test_size; i++) { sum += a1[i] * a2[i]; @@ -75,7 +73,9 @@ float dot_product(const float * a1, const float * a2, size_t test_size) { } // Total dot product error -float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) { +static float dot_product_error( + ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2 +) { std::vector tmp_q1(2*test_size); std::vector tmp_q2(2*test_size); @@ -92,8 +92,6 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float return fabsf(result - dot_ref) / test_size; } -} // namespace - int main(int argc, char * argv[]) { bool verbose = false; const size_t test_size = 32 * 128; diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index b1375ea10..01aa69877 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -60,25 +60,23 @@ inline int64_t cpu_cycles() { #endif -namespace { - // Generate synthetic data -void generate_data(float offset, size_t n, float * dst) { +static void generate_data(float offset, size_t n, float * dst) { for (size_t i = 0; i < n; i++) { dst[i] = 0.1 + 2*cosf(i + offset); } } -float gigabytes_per_second(size_t bytes, int64_t usecs) { +static float gigabytes_per_second(size_t bytes, int64_t usecs) { return bytes / (float) usecs * 1000000 / (1024*1024*1024); } -void * align_with_offset(void * ptr, int offset) { +static void * align_with_offset(void * ptr, int offset) { size_t dummy_size = MAX_ALIGNMENT * 4; return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset; } -void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function & function) { +static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function & function) { int64_t min_time_us = INT64_MAX; int64_t total_time_us = 0; int64_t min_time_cycles = INT64_MAX; @@ -110,7 +108,7 @@ void benchmark_function(size_t size, size_t q_size, int64_t iterations, const st printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * iterations, total_time_us)); } -void usage(char * argv[]) { +static void usage(char * argv[]) { printf("Benchmark quantization specific functions on synthetic data\n"); printf("\n"); printf("usage: %s [options]\n", argv[0]); @@ -139,8 +137,6 @@ void usage(char * argv[]) { printf(" set test iteration number (%d)\n", ITERATIONS); } -} // namespace - int main(int argc, char * argv[]) { quantize_perf_params params {}; diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index a928f53c1..019c0d462 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -13,9 +13,7 @@ #include -namespace { - -void dump(const llama_token_data_array * candidates) { +static void dump(const llama_token_data_array * candidates) { for (size_t i = 0; i < candidates->size; i++) { printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit); } @@ -24,9 +22,7 @@ void dump(const llama_token_data_array * candidates) { #define DUMP(__candidates) do { printf("%s:%d (%s)\n", __FILE__, __LINE__, __func__); dump((__candidates)); printf("-\n"); } while(0) -void test_top_k(const std::vector & probs, - const std::vector & expected_probs, - int k) { +static void test_top_k(const std::vector & probs, const std::vector & expected_probs, int k) { size_t n_vocab = probs.size(); std::vector candidates; candidates.reserve(n_vocab); @@ -48,10 +44,7 @@ void test_top_k(const std::vector & probs, } -void test_top_p(const std::vector & probs, - const std::vector & expected_probs, - float p) { - +static void test_top_p(const std::vector & probs, const std::vector & expected_probs, float p) { size_t n_vocab = probs.size(); std::vector candidates; candidates.reserve(n_vocab); @@ -73,9 +66,7 @@ void test_top_p(const std::vector & probs, } -void test_tfs(const std::vector & probs, - const std::vector & expected_probs, - float z) { +static void test_tfs(const std::vector & probs, const std::vector & expected_probs, float z) { size_t n_vocab = probs.size(); std::vector candidates; candidates.reserve(n_vocab); @@ -96,9 +87,7 @@ void test_tfs(const std::vector & probs, } -void test_typical(const std::vector & probs, - const std::vector & expected_probs, - float p) { +static void test_typical(const std::vector & probs, const std::vector & expected_probs, float p) { size_t n_vocab = probs.size(); std::vector candidates; candidates.reserve(n_vocab); @@ -119,11 +108,10 @@ void test_typical(const std::vector & probs, } -void test_repetition_penalty( - const std::vector & probs, - const std::vector & last_tokens, - const std::vector & expected_probs, - float penalty) { +static void test_repetition_penalty( + const std::vector & probs, const std::vector & last_tokens, + const std::vector & expected_probs, float penalty +) { assert(probs.size() == expected_probs.size()); size_t n_vocab = probs.size(); @@ -148,11 +136,10 @@ void test_repetition_penalty( } -void test_frequency_presence_penalty( - const std::vector & probs, - const std::vector & last_tokens, - const std::vector & expected_probs, - float alpha_frequency, float alpha_presence) { +static void test_frequency_presence_penalty( + const std::vector & probs, const std::vector & last_tokens, + const std::vector & expected_probs, float alpha_frequency, float alpha_presence +) { assert(probs.size() == expected_probs.size()); size_t n_vocab = probs.size(); @@ -176,8 +163,6 @@ void test_frequency_presence_penalty( } } -} // namespace - int main(void) { ggml_time_init();