From d14c066f0c34d723f20706dd8eea64934e5c2561 Mon Sep 17 00:00:00 2001 From: Aniket Date: Tue, 8 Aug 2023 20:40:17 -0400 Subject: [PATCH] cleaning up to remove spaces and satisfy failed checks --- .../convert-llama2c-to-ggml.cpp | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index f7b144eed..2f2e9a159 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -55,17 +55,17 @@ void malloc_weights(TransformerWeights* w, Config* p) { // we calloc instead of malloc to keep valgrind happy w->token_embedding_table = new float[p->vocab_size * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->token_embedding_table\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim); - + w->rms_att_weight = new float[p->n_layers * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->rms_att_weight\n",__func__,p->n_layers, p->dim, p->n_layers * p->dim); w->rms_ffn_weight = new float[p->n_layers * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->rms_ffn_weight\n",__func__,p->n_layers , p->dim, p->n_layers * p->dim); - w->wq = new float[p->n_layers * p->dim * p->dim](); + w->wq = new float[p->n_layers * p->dim * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wq\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim); - w->wk = new float[p->n_layers * p->dim * p->dim](); + w->wk = new float[p->n_layers * p->dim * p->dim](); printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wk\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim); w->wv = new float[p->n_layers * p->dim * p->dim](); @@ -200,7 +200,7 @@ struct my_llama_model { struct train_params { const char * fn_vocab_model; const char * fn_llama2c_model; - const char * fn_llama2c_output_model; + const char * fn_llama2c_output_model; const char * fn_train_data; const char * fn_checkpoint_in; const char * fn_checkpoint_out; @@ -295,7 +295,6 @@ void init_model(struct my_llama_model * model) { printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w1 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer); printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w2 for [%d] layers\n",__func__, n_embd, n_ff, n_ff * n_embd, n_layer); printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w3 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer); - ggml_set_name(model->tok_embeddings, "tok_embeddings.weight"); ggml_set_name(model->norm, "norm.weight"); @@ -506,7 +505,7 @@ void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * kar case 2: ct = 0; for (int i1 = 0; i1 < gg_weights->ne[1]; i1++) { - for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) { + for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) { float * ptr = (float *) ((char *) gg_weights->data + i0*gg_weights->nb[0] + i1*gg_weights->nb[1]); *ptr = karpathy_weights[ct]; ct++; @@ -517,14 +516,14 @@ void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * kar ct = 0; for (int i2 = 0; i2 < gg_weights->ne[2]; i2++) { for (int i1 = 0; i1 < gg_weights->ne[1]; i1++) { - for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) { + for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) { float * ptr = (float *) ((char *) gg_weights->data + i0*gg_weights->nb[0] + i1*gg_weights->nb[1] + i2*gg_weights->nb[2]); *ptr = karpathy_weights[ct]; ct++; } } } - break; + break; } } @@ -559,8 +558,8 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod // float* -> struct ggml_tensor stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table); stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table); - - stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight); + + stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight); //print_row(model->norm, 0); // for rms-att-weight @@ -568,7 +567,7 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod const auto & hparams = model->hparams; //int n_ff = model->hparams.n_embd; int n_ff = get_n_ff(&hparams); - + for (uint32_t i = 0; i < model->hparams.n_layer; ++i){ auto & layer = model->layers[i]; // 1d @@ -580,7 +579,7 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]); stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]); stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]); - + stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]); stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]); stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]); @@ -589,7 +588,7 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod write_tensor(&file, model->tok_embeddings); write_tensor(&file, model->norm); write_tensor(&file, model->output); // ? - for (uint32_t i = 0; i < model->hparams.n_layer; ++i) { + for (uint32_t i = 0; i < model->hparams.n_layer; ++i) { auto & layer = model->layers[i]; write_tensor(&file, layer.attention_norm); @@ -660,8 +659,8 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params) fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " --vocab-model FNAME model path from which to load vocab (default '%s')\n", params->fn_vocab_model); - fprintf(stderr, " --llama2c-model FNAME model path from which to load Karpathy's llama2.c model\n"); - fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model); + fprintf(stderr, " --llama2c-model FNAME model path from which to load Karpathy's llama2.c model\n"); + fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model); fprintf(stderr, "\n"); } @@ -688,13 +687,13 @@ bool params_parse(int argc, char ** argv, struct train_params * params) { invalid_param = true; break; } - params->fn_llama2c_model = argv[i]; + params->fn_llama2c_model = argv[i]; } else if (arg == "--llama2c-output-model") { if (++i >= argc) { invalid_param = true; break; } - params->fn_llama2c_output_model = argv[i]; + params->fn_llama2c_output_model = argv[i]; } else if (arg == "-h" || arg == "--help") { print_usage(argc, argv, &default_params); exit(0); @@ -720,7 +719,7 @@ int main(int argc, char ** argv) { } Config config; TransformerWeights weights; - { + { FILE *file = fopen(params.fn_llama2c_model, "rb"); if (!file) { printf("Unable to open the checkpoint file %s!\n", params.fn_llama2c_model); return 1; } // read in the config header @@ -741,7 +740,7 @@ int main(int argc, char ** argv) { { std::vector strings; std::vector scores; - int n_vocab = llama_n_vocab(lctx); + int n_vocab = llama_n_vocab(lctx); strings.resize(n_vocab, NULL); scores.resize(n_vocab, 0); n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); @@ -749,7 +748,7 @@ int main(int argc, char ** argv) { vocab.id_to_token.resize(n_vocab); for (int i=0; i