common : simplify
This commit is contained in:
parent
6c081e501c
commit
411137a608
2 changed files with 2 additions and 23 deletions
|
@ -2326,21 +2326,6 @@ std::vector<llama_token> llama_tokenize(
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
|
||||||
std::vector<char> result(8, 0);
|
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
|
||||||
if (n_tokens < 0) {
|
|
||||||
result.resize(-n_tokens);
|
|
||||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
|
||||||
GGML_ASSERT(check == -n_tokens);
|
|
||||||
} else {
|
|
||||||
result.resize(n_tokens);
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::string(result.data(), result.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// duplicate with ability to specify whether to use special token
|
|
||||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
||||||
std::vector<char> result(8, 0);
|
std::vector<char> result(8, 0);
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
||||||
|
|
|
@ -237,18 +237,12 @@ std::vector<llama_token> llama_tokenize(
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
// tokenizes a token into a piece
|
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||||
// should work similar to Python's `tokenizer.id_to_piece`
|
// should work similar to Python's `tokenizer.id_to_piece`
|
||||||
std::string llama_token_to_piece(
|
|
||||||
const struct llama_context * ctx,
|
|
||||||
llama_token token
|
|
||||||
);
|
|
||||||
|
|
||||||
std::string llama_token_to_piece(
|
std::string llama_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
bool special
|
bool special = true);
|
||||||
);
|
|
||||||
|
|
||||||
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
||||||
// that takes into account the tokenizer type and decides how to handle the leading space
|
// that takes into account the tokenizer type and decides how to handle the leading space
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue