From d2b95e7e70d259cce689cd329a59bed6f89eb2a4 Mon Sep 17 00:00:00 2001
From: Johannes Rudolph <johannes.rudolph@gmail.com>
Date: Thu, 10 Aug 2023 16:17:26 +0200
Subject: [PATCH] refactor vocab loading into its own method

---
 .../convert-llama2c-to-ggml.cpp               | 53 ++++++++++---------
 1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 3bd388635..28759ae39 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -491,6 +491,32 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
     file->write_raw(tensor->data, ggml_nbytes(tensor));
 }
 
+void load_vocab(const char *filename, struct llama_vocab *vocab) {
+    struct llama_context_params llama_params = llama_context_default_params();
+    llama_params.vocab_only = true;
+
+    struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
+    struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
+
+    std::vector<const char *> strings;
+    std::vector<float> scores;
+    int n_vocab = llama_n_vocab(lctx);
+    strings.resize(n_vocab, NULL);
+    scores.resize(n_vocab, 0);
+    n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
+    GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
+    vocab->id_to_token.resize(n_vocab);
+    for (int i=0; i<n_vocab; ++i) {
+        std::string tok   = std::string(strings[i]);
+        float       score = scores[i];
+        vocab->id_to_token[i].tok   = tok;
+        vocab->id_to_token[i].score = score;
+        vocab->token_to_id.emplace(tok, i);
+    }
+    llama_free(lctx);
+    llama_free_model(lmodel);
+}
+
 void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){
     int ct;
     switch (gg_weights->n_dims){
@@ -737,30 +763,9 @@ int main(int argc, char ** argv) {
         fclose(file);
     }
 
-    struct llama_context_params llama_params = llama_context_default_params();
-    llama_params.vocab_only = true;
-
-    struct llama_model * lmodel = llama_load_model_from_file(params.fn_vocab_model, llama_params);
-    struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
-
     struct llama_vocab vocab;
-    {
-        std::vector<const char *> strings;
-        std::vector<float> scores;
-        int n_vocab = llama_n_vocab(lctx);
-        strings.resize(n_vocab, NULL);
-        scores.resize(n_vocab, 0);
-        n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
-        GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
-        vocab.id_to_token.resize(n_vocab);
-        for (int i=0; i<n_vocab; ++i) {
-            std::string tok   = std::string(strings[i]);
-            float       score = scores[i];
-            vocab.id_to_token[i].tok   = tok;
-            vocab.id_to_token[i].score = score;
-            vocab.token_to_id.emplace(tok, i);
-        }
-    }
+    load_vocab(params.fn_vocab_model, &vocab);
+
     struct my_llama_model model;
     model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);
     model.hparams.n_ctx   = params.n_ctx;
@@ -782,8 +787,6 @@ int main(int argc, char ** argv) {
 
     printf("Saving llama.c model file %s in ggml format at %s\n", params.fn_llama2c_model, params.fn_llama2c_output_model);
 
-    llama_free(lctx);
-    llama_free_model(lmodel);
     ggml_free(model.ctx);
     free_weights(&weights);
     return 0;