Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token;

2023-03-19 18:38:42 -03:00 · 2023-03-19 18:38:42 -03:00 · 25ef27c6bc
commit 25ef27c6bc
parent 4545539d71
4 changed files with 11 additions and 9 deletions
--- a/main.cpp
+++ b/main.cpp
@ -31,7 +31,7 @@
 static const int EOS_TOKEN_ID = 2;

 // determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
    { 4096, 1 },
    { 5120, 2 },
    { 6656, 4 },
@ -85,7 +85,7 @@ struct llama_model {

    //
    struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
 };

 // load the model's weights from a file
@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
    // load vocab
    {
        std::string word;
+        vocab.id_to_token.resize(model.hparams.n_vocab);
        for (int i = 0; i < model.hparams.n_vocab; i++) {
            uint32_t len;
            fin.read((char *) &len, sizeof(len));
--- a/quantize.cpp
+++ b/quantize.cpp
@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
        }

        std::string word;
+        vocab.id_to_token.resize(n_vocab);
        for (int i = 0; i < n_vocab; i++) {
            uint32_t len;
            finp.read ((char *) &len, sizeof(len));
--- a/utils.cpp
+++ b/utils.cpp
@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
    }
 }

-std::map<std::string, int32_t> json_parse(const std::string & fname) {
-    std::map<std::string, int32_t> result;
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
+    std::unordered_map<std::string, int32_t> result;

    // read file into string
    std::string json;
@ -334,7 +334,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
 	    break;
        }
        res.push_back(token_id);
-        auto token = (*vocab.id_to_token.find(token_id)).second;
+        const auto &token = vocab.id_to_token.at(token_id);
        i -= token.length();
    }

--- a/utils.h
+++ b/utils.h
@ -3,7 +3,7 @@
 #pragma once

 #include <string>
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include <random>
 #include <thread>
@ -56,14 +56,14 @@ struct gpt_vocab {
    using id    = int32_t;
    using token = std::string;

-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
+    std::unordered_map<token, id> token_to_id;
+    std::vector<token> id_to_token;
 };

 void replace(std::string & str, const std::string & needle, const std::string & replacement);

 // poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);

 // split text into tokens
 //