Add llama_tokens_to_string() to utils.cpp
- Also single token converter
This commit is contained in:
parent
912e6246d6
commit
05224ed472
2 changed files with 15 additions and 0 deletions
10
utils.cpp
10
utils.cpp
|
@ -347,6 +347,16 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
|
|||
|
||||
return res;
|
||||
}
|
||||
std::string llama_tokens_to_string(const gpt_vocab & vocab, const std::vector<gpt_vocab::id> & tokens) {
|
||||
std::string res;
|
||||
for (auto t : tokens) {
|
||||
res += vocab.id_to_token.at(t);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
std::string single_llama_token_to_string(const gpt_vocab & vocab, const gpt_vocab::id & tokens) {
|
||||
return vocab.id_to_token.at(tokens);
|
||||
}
|
||||
|
||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
|
||||
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
|
||||
|
|
5
utils.h
5
utils.h
|
@ -81,6 +81,11 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
|
|||
// ref: https://github.com/google/sentencepiece
|
||||
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos);
|
||||
|
||||
// convert tokens to string
|
||||
// opposite llama_tokenize
|
||||
std::string llama_tokens_to_string(const gpt_vocab & vocab, const std::vector<gpt_vocab::id> & tokens);
|
||||
inline std::string single_llama_token_to_string(const gpt_vocab & vocab, const gpt_vocab::id & tokens);
|
||||
|
||||
// load the tokens from encoder.json
|
||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue