Add llama_tokens_to_string() to utils.cpp
- Also single token converter
This commit is contained in:
parent
912e6246d6
commit
05224ed472
2 changed files with 15 additions and 0 deletions
10
utils.cpp
10
utils.cpp
|
@ -347,6 +347,16 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
std::string llama_tokens_to_string(const gpt_vocab & vocab, const std::vector<gpt_vocab::id> & tokens) {
|
||||||
|
std::string res;
|
||||||
|
for (auto t : tokens) {
|
||||||
|
res += vocab.id_to_token.at(t);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
std::string single_llama_token_to_string(const gpt_vocab & vocab, const gpt_vocab::id & tokens) {
|
||||||
|
return vocab.id_to_token.at(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
|
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
|
||||||
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
|
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
|
||||||
|
|
5
utils.h
5
utils.h
|
@ -81,6 +81,11 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
|
||||||
// ref: https://github.com/google/sentencepiece
|
// ref: https://github.com/google/sentencepiece
|
||||||
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos);
|
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos);
|
||||||
|
|
||||||
|
// convert tokens to string
|
||||||
|
// opposite llama_tokenize
|
||||||
|
std::string llama_tokens_to_string(const gpt_vocab & vocab, const std::vector<gpt_vocab::id> & tokens);
|
||||||
|
inline std::string single_llama_token_to_string(const gpt_vocab & vocab, const gpt_vocab::id & tokens);
|
||||||
|
|
||||||
// load the tokens from encoder.json
|
// load the tokens from encoder.json
|
||||||
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue