From d2b95e7e70d259cce689cd329a59bed6f89eb2a4 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Thu, 10 Aug 2023 16:17:26 +0200 Subject: [PATCH] refactor vocab loading into its own method --- .../convert-llama2c-to-ggml.cpp | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 3bd388635..28759ae39 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -491,6 +491,32 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_raw(tensor->data, ggml_nbytes(tensor)); } +void load_vocab(const char *filename, struct llama_vocab *vocab) { + struct llama_context_params llama_params = llama_context_default_params(); + llama_params.vocab_only = true; + + struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params); + struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); + + std::vector strings; + std::vector scores; + int n_vocab = llama_n_vocab(lctx); + strings.resize(n_vocab, NULL); + scores.resize(n_vocab, 0); + n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); + GGML_ASSERT(n_vocab == llama_n_vocab(lctx)); + vocab->id_to_token.resize(n_vocab); + for (int i=0; iid_to_token[i].tok = tok; + vocab->id_to_token[i].score = score; + vocab->token_to_id.emplace(tok, i); + } + llama_free(lctx); + llama_free_model(lmodel); +} + void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){ int ct; switch (gg_weights->n_dims){ @@ -737,30 +763,9 @@ int main(int argc, char ** argv) { fclose(file); } - struct llama_context_params llama_params = llama_context_default_params(); - llama_params.vocab_only = true; - - struct llama_model * lmodel = llama_load_model_from_file(params.fn_vocab_model, llama_params); - struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); - struct llama_vocab vocab; - { - std::vector strings; - std::vector scores; - int n_vocab = llama_n_vocab(lctx); - strings.resize(n_vocab, NULL); - scores.resize(n_vocab, 0); - n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); - GGML_ASSERT(n_vocab == llama_n_vocab(lctx)); - vocab.id_to_token.resize(n_vocab); - for (int i=0; i