diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 0782d3a41..6a3a9d48e 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1245,8 +1245,13 @@ struct llama_vocab::impl { std::vector cache_special_tokens; std::vector cache_token_to_piece; // llama_token_to_piece(special = true); - - std::map, int> bpe_ranks; + struct PairHash { + size_t operator()(const std::pair& p) const { + return std::hash{}(p.first) ^ //create some hash for pair + (std::hash{}(p.second) << 1); + } + }; + std::unordered_map, int, PairHash> bpe_ranks; // set of all tokens that cause "end of generation" std::set special_eog_ids;