From 25ef27c6bcd121c34bf4194d876393b1919daf9a Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Sun, 19 Mar 2023 18:38:42 -0300
Subject: [PATCH] Improve performance by changing std::map to
 std::unordered_map and std::map<id, token> id_to_token; to std::vector<token>
 id_to_token;

---
 main.cpp     | 5 +++--
 quantize.cpp | 1 +
 utils.cpp    | 6 +++---
 utils.h      | 8 ++++----
 4 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/main.cpp b/main.cpp
index c7186e0df..fd3ba58a3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -31,7 +31,7 @@
 static const int EOS_TOKEN_ID = 2;
 
 // determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
     { 4096, 1 },
     { 5120, 2 },
     { 6656, 4 },
@@ -85,7 +85,7 @@ struct llama_model {
 
     //
     struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
 };
 
 // load the model's weights from a file
@@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
     // load vocab
     {
         std::string word;
+        vocab.id_to_token.resize(model.hparams.n_vocab);
         for (int i = 0; i < model.hparams.n_vocab; i++) {
             uint32_t len;
             fin.read((char *) &len, sizeof(len));
diff --git a/quantize.cpp b/quantize.cpp
index 14c7b277a..63ff750b3 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
         }
 
         std::string word;
+        vocab.id_to_token.resize(n_vocab);
         for (int i = 0; i < n_vocab; i++) {
             uint32_t len;
             finp.read ((char *) &len, sizeof(len));
diff --git a/utils.cpp b/utils.cpp
index 08d5c6ba6..762edf547 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
     }
 }
 
-std::map<std::string, int32_t> json_parse(const std::string & fname) {
-    std::map<std::string, int32_t> result;
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
+    std::unordered_map<std::string, int32_t> result;
 
     // read file into string
     std::string json;
@@ -334,7 +334,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
 	    break;
         }
         res.push_back(token_id);
-        auto token = (*vocab.id_to_token.find(token_id)).second;
+        const auto &token = vocab.id_to_token.at(token_id);
         i -= token.length();
     }
 
diff --git a/utils.h b/utils.h
index 49658f7d9..d69462a57 100644
--- a/utils.h
+++ b/utils.h
@@ -3,7 +3,7 @@
 #pragma once
 
 #include <string>
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include <random>
 #include <thread>
@@ -56,14 +56,14 @@ struct gpt_vocab {
     using id    = int32_t;
     using token = std::string;
 
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
+    std::unordered_map<token, id> token_to_id;
+    std::vector<token> id_to_token;
 };
 
 void replace(std::string & str, const std::string & needle, const std::string & replacement);
 
 // poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
 
 // split text into tokens
 //