From ca97583f0b5c403ee09f9d34c15ebfb94441945d Mon Sep 17 00:00:00 2001 From: xaedes Date: Tue, 29 Aug 2023 01:19:45 +0200 Subject: [PATCH] remove vocab related code as it is unnecessary --- examples/finetune/finetune.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 0e8a816a1..09cfc6bf3 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -154,19 +154,6 @@ struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struc return tensor; } -struct llama_vocab { - using id = int32_t; - using token = std::string; - - struct token_score { - token tok; - float score; - }; - - std::unordered_map token_to_id; - std::vector id_to_token; -}; - struct my_llama_hparams { uint32_t n_vocab = 32000; uint32_t n_ctx = 512; // this is provided as user input? @@ -2304,25 +2291,6 @@ int main(int argc, char ** argv) { struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params); struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); - //struct llama_vocab vocab; - //{ - // std::vector strings; - // std::vector scores; - // int n_vocab = llama_n_vocab(lctx); - // strings.resize(n_vocab, NULL); - // scores.resize(n_vocab, 0); - // n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); - // GGML_ASSERT(n_vocab == llama_n_vocab(lctx)); - // vocab.id_to_token.resize(n_vocab); - // for (int i=0; i train_tokens; if (tokenize_file(lctx, params.fn_train_data, train_tokens) < 0) {