Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token;
This commit is contained in:
parent
4545539d71
commit
25ef27c6bc
4 changed files with 11 additions and 9 deletions
5
main.cpp
5
main.cpp
|
@ -31,7 +31,7 @@
|
|||
static const int EOS_TOKEN_ID = 2;
|
||||
|
||||
// determine number of model parts based on the dimension
|
||||
static const std::map<int, int> LLAMA_N_PARTS = {
|
||||
static const std::unordered_map<int, int> LLAMA_N_PARTS = {
|
||||
{ 4096, 1 },
|
||||
{ 5120, 2 },
|
||||
{ 6656, 4 },
|
||||
|
@ -85,7 +85,7 @@ struct llama_model {
|
|||
|
||||
//
|
||||
struct ggml_context * ctx;
|
||||
std::map<std::string, struct ggml_tensor *> tensors;
|
||||
std::unordered_map<std::string, struct ggml_tensor *> tensors;
|
||||
};
|
||||
|
||||
// load the model's weights from a file
|
||||
|
@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
|||
// load vocab
|
||||
{
|
||||
std::string word;
|
||||
vocab.id_to_token.resize(model.hparams.n_vocab);
|
||||
for (int i = 0; i < model.hparams.n_vocab; i++) {
|
||||
uint32_t len;
|
||||
fin.read((char *) &len, sizeof(len));
|
||||
|
|
|
@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
|
|||
}
|
||||
|
||||
std::string word;
|
||||
vocab.id_to_token.resize(n_vocab);
|
||||
for (int i = 0; i < n_vocab; i++) {
|
||||
uint32_t len;
|
||||
finp.read ((char *) &len, sizeof(len));
|
||||
|
|
|
@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
|
|||
}
|
||||
}
|
||||
|
||||
std::map<std::string, int32_t> json_parse(const std::string & fname) {
|
||||
std::map<std::string, int32_t> result;
|
||||
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
|
||||
std::unordered_map<std::string, int32_t> result;
|
||||
|
||||
// read file into string
|
||||
std::string json;
|
||||
|
@ -334,7 +334,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
|
|||
break;
|
||||
}
|
||||
res.push_back(token_id);
|
||||
auto token = (*vocab.id_to_token.find(token_id)).second;
|
||||
const auto &token = vocab.id_to_token.at(token_id);
|
||||
i -= token.length();
|
||||
}
|
||||
|
||||
|
|
8
utils.h
8
utils.h
|
@ -3,7 +3,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
@ -56,14 +56,14 @@ struct gpt_vocab {
|
|||
using id = int32_t;
|
||||
using token = std::string;
|
||||
|
||||
std::map<token, id> token_to_id;
|
||||
std::map<id, token> id_to_token;
|
||||
std::unordered_map<token, id> token_to_id;
|
||||
std::vector<token> id_to_token;
|
||||
};
|
||||
|
||||
void replace(std::string & str, const std::string & needle, const std::string & replacement);
|
||||
|
||||
// poor-man's JSON parsing
|
||||
std::map<std::string, int32_t> json_parse(const std::string & fname);
|
||||
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
|
||||
|
||||
// split text into tokens
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue