We could use std::unordered_map over std::map (#305)
* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include <map> * Nest struct token score inside gpt_vocab * renamed token to tok
This commit is contained in:
parent
89d5d90f3b
commit
353ec251a4
4 changed files with 36 additions and 24 deletions
14
utils.h
14
utils.h
|
@ -3,7 +3,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
@ -65,15 +65,19 @@ struct llama_vocab {
|
|||
using id = int32_t;
|
||||
using token = std::string;
|
||||
|
||||
std::map<token, id> token_to_id;
|
||||
std::map<id, token> id_to_token;
|
||||
std::map<id, float> score;
|
||||
struct token_score {
|
||||
token tok;
|
||||
float score;
|
||||
};
|
||||
|
||||
std::unordered_map<token, id> token_to_id;
|
||||
std::vector<token_score> id_to_token;
|
||||
};
|
||||
|
||||
void replace(std::string & str, const std::string & needle, const std::string & replacement);
|
||||
|
||||
// poor-man's JSON parsing
|
||||
std::map<std::string, int32_t> json_parse(const std::string & fname);
|
||||
std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
|
||||
|
||||
// TODO: temporary until #77 is merged, need this now for some tokenizer tests
|
||||
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue