Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token;

This commit is contained in:
Fabio Rossini Sluzala 2023-03-19 18:38:42 -03:00
parent 4545539d71
commit 25ef27c6bc
No known key found for this signature in database
GPG key ID: F9D569BBF49F437B
4 changed files with 11 additions and 9 deletions

View file

@ -31,7 +31,7 @@
static const int EOS_TOKEN_ID = 2; static const int EOS_TOKEN_ID = 2;
// determine number of model parts based on the dimension // determine number of model parts based on the dimension
static const std::map<int, int> LLAMA_N_PARTS = { static const std::unordered_map<int, int> LLAMA_N_PARTS = {
{ 4096, 1 }, { 4096, 1 },
{ 5120, 2 }, { 5120, 2 },
{ 6656, 4 }, { 6656, 4 },
@ -85,7 +85,7 @@ struct llama_model {
// //
struct ggml_context * ctx; struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors; std::unordered_map<std::string, struct ggml_tensor *> tensors;
}; };
// load the model's weights from a file // load the model's weights from a file
@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
// load vocab // load vocab
{ {
std::string word; std::string word;
vocab.id_to_token.resize(model.hparams.n_vocab);
for (int i = 0; i < model.hparams.n_vocab; i++) { for (int i = 0; i < model.hparams.n_vocab; i++) {
uint32_t len; uint32_t len;
fin.read((char *) &len, sizeof(len)); fin.read((char *) &len, sizeof(len));

View file

@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
} }
std::string word; std::string word;
vocab.id_to_token.resize(n_vocab);
for (int i = 0; i < n_vocab; i++) { for (int i = 0; i < n_vocab; i++) {
uint32_t len; uint32_t len;
finp.read ((char *) &len, sizeof(len)); finp.read ((char *) &len, sizeof(len));

View file

@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
} }
} }
std::map<std::string, int32_t> json_parse(const std::string & fname) { std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
std::map<std::string, int32_t> result; std::unordered_map<std::string, int32_t> result;
// read file into string // read file into string
std::string json; std::string json;
@ -334,7 +334,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
break; break;
} }
res.push_back(token_id); res.push_back(token_id);
auto token = (*vocab.id_to_token.find(token_id)).second; const auto &token = vocab.id_to_token.at(token_id);
i -= token.length(); i -= token.length();
} }

View file

@ -3,7 +3,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include <map> #include <unordered_map>
#include <vector> #include <vector>
#include <random> #include <random>
#include <thread> #include <thread>
@ -56,14 +56,14 @@ struct gpt_vocab {
using id = int32_t; using id = int32_t;
using token = std::string; using token = std::string;
std::map<token, id> token_to_id; std::unordered_map<token, id> token_to_id;
std::map<id, token> id_to_token; std::vector<token> id_to_token;
}; };
void replace(std::string & str, const std::string & needle, const std::string & replacement); void replace(std::string & str, const std::string & needle, const std::string & replacement);
// poor-man's JSON parsing // poor-man's JSON parsing
std::map<std::string, int32_t> json_parse(const std::string & fname); std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
// split text into tokens // split text into tokens
// //