Remove previous space
This commit is contained in:
parent
503b7531c7
commit
0cc6593f10
3 changed files with 45 additions and 54 deletions
|
@ -2908,10 +2908,10 @@ std::vector<llama_token> llama_tokenize(
|
||||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
||||||
std::string piece;
|
std::string piece;
|
||||||
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
|
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
|
||||||
const int n_chars = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), special);
|
const int n_chars = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special);
|
||||||
if (n_chars < 0) {
|
if (n_chars < 0) {
|
||||||
piece.resize(-n_chars);
|
piece.resize(-n_chars);
|
||||||
int check = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), special);
|
int check = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special);
|
||||||
GGML_ASSERT(check == -n_chars);
|
GGML_ASSERT(check == -n_chars);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
92
llama.cpp
92
llama.cpp
|
@ -1835,18 +1835,19 @@ using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;
|
||||||
|
|
||||||
// NOTE: avoid ever using this except for building the token_to_piece caches
|
// NOTE: avoid ever using this except for building the token_to_piece caches
|
||||||
static std::string llama_token_to_piece(const struct llama_model * model, llama_token token, bool special) {
|
static std::string llama_token_to_piece(const struct llama_model * model, llama_token token, bool special) {
|
||||||
std::vector<char> result(8, 0);
|
std::string piece;
|
||||||
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), special);
|
piece.resize(piece.capacity()); // using string internal cache
|
||||||
if (n_tokens < 0) {
|
const int n_chars = llama_token_to_piece(model, token, &piece[0], piece.size(), 0, special);
|
||||||
result.resize(-n_tokens);
|
if (n_chars < 0) {
|
||||||
int check = llama_token_to_piece(model, token, result.data(), result.size(), special);
|
piece.resize(-n_chars);
|
||||||
GGML_ASSERT(check == -n_tokens);
|
int check = llama_token_to_piece(model, token, &piece[0], piece.size(), 0, special);
|
||||||
|
GGML_ASSERT(check == -n_chars);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
result.resize(n_tokens);
|
piece.resize(n_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::string(result.data(), result.size());
|
return piece;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer) {
|
static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer) {
|
||||||
|
@ -18418,23 +18419,33 @@ static std::string llama_decode_text(const std::string & text) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// does not write null-terminator to buf
|
// does not write null-terminator to buf
|
||||||
int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length, bool special) {
|
int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length, int32_t lstrip, bool special) {
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/7587#discussion_r1620983843
|
// ref: https://github.com/ggerganov/llama.cpp/pull/7587#discussion_r1620983843
|
||||||
if (!special && llama_is_control_token(model->vocab, token)) {
|
if (!special && llama_is_control_token(model->vocab, token)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copy piece chars to output text buffer
|
||||||
|
// skip up to 'lstrip' leading spaces before copying
|
||||||
|
auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
|
||||||
|
for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
|
||||||
|
token++;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
if (length < (int32_t)size) {
|
||||||
|
return (int32_t) -size;
|
||||||
|
}
|
||||||
|
memcpy(buf, token, size);
|
||||||
|
return (int32_t) size;
|
||||||
|
};
|
||||||
|
|
||||||
// if we have a cache - use it
|
// if we have a cache - use it
|
||||||
{
|
{
|
||||||
const auto & cache = model->vocab.cache_token_to_piece;
|
const auto & cache = model->vocab.cache_token_to_piece;
|
||||||
|
|
||||||
if (!cache.empty()) {
|
if (!cache.empty()) {
|
||||||
const auto & res = cache.at(token);
|
const auto & result = cache.at(token);
|
||||||
if (length < (int) res.size()) {
|
return _try_copy(result.data(), result.size());
|
||||||
return -(int) res.size();
|
|
||||||
}
|
|
||||||
memcpy(buf, res.c_str(), res.size());
|
|
||||||
return res.size();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18447,32 +18458,17 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
|
||||||
if (llama_is_normal_token(model->vocab, token)) {
|
if (llama_is_normal_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
llama_unescape_whitespace(result);
|
llama_unescape_whitespace(result);
|
||||||
if (length < (int) result.length()) {
|
return _try_copy(result.data(), result.size());
|
||||||
return -(int) result.length();
|
|
||||||
}
|
|
||||||
memcpy(buf, result.c_str(), result.length());
|
|
||||||
return result.length();
|
|
||||||
} else if (
|
} else if (
|
||||||
(llama_is_user_defined_token(model->vocab, token)) ||
|
(llama_is_user_defined_token(model->vocab, token)) ||
|
||||||
(llama_is_control_token (model->vocab, token) && special)) {
|
(llama_is_control_token (model->vocab, token) && special)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
const std::string & result = model->vocab.id_to_token[token].text;
|
||||||
if (length < (int) result.length()) {
|
return _try_copy(result.data(), result.size());
|
||||||
return -(int) result.length();
|
/**/ } else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
||||||
}
|
/**/ return _try_copy("\xe2\x96\x85", 3);
|
||||||
memcpy(buf, result.c_str(), result.length());
|
|
||||||
return result.length();
|
|
||||||
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
|
||||||
if (length < 3) {
|
|
||||||
return -3;
|
|
||||||
}
|
|
||||||
memcpy(buf, "\xe2\x96\x85", 3);
|
|
||||||
return 3;
|
|
||||||
} else if (llama_is_byte_token(model->vocab, token)) {
|
} else if (llama_is_byte_token(model->vocab, token)) {
|
||||||
if (length < 1) {
|
char byte = (char) llama_token_to_byte(model->vocab, token);
|
||||||
return -1;
|
return _try_copy((char*)&byte, 1);
|
||||||
}
|
|
||||||
buf[0] = llama_token_to_byte(model->vocab, token);
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -18480,22 +18476,13 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
|
||||||
// NOTE: we accept all unsupported token types,
|
// NOTE: we accept all unsupported token types,
|
||||||
// suppressing them like CONTROL tokens.
|
// suppressing them like CONTROL tokens.
|
||||||
if (llama_is_normal_token(model->vocab, token)) {
|
if (llama_is_normal_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = llama_decode_text(model->vocab.id_to_token[token].text);
|
||||||
result = llama_decode_text(result);
|
return _try_copy(result.data(), result.size());
|
||||||
if (length < (int) result.length()) {
|
|
||||||
return -(int) result.length();
|
|
||||||
}
|
|
||||||
memcpy(buf, result.c_str(), result.length());
|
|
||||||
return result.length();
|
|
||||||
} else if (
|
} else if (
|
||||||
(llama_is_user_defined_token(model->vocab, token)) ||
|
(llama_is_user_defined_token(model->vocab, token)) ||
|
||||||
(llama_is_control_token (model->vocab, token) && special)) {
|
(llama_is_control_token (model->vocab, token) && special)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
const std::string & result = model->vocab.id_to_token[token].text;
|
||||||
if (length < (int) result.length()) {
|
return _try_copy(result.data(), result.size());
|
||||||
return -(int) result.length();
|
|
||||||
}
|
|
||||||
memcpy(buf, result.c_str(), result.length());
|
|
||||||
return result.length();
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -18513,12 +18500,15 @@ int32_t llama_detokenize(
|
||||||
char * text,
|
char * text,
|
||||||
int32_t text_len_max,
|
int32_t text_len_max,
|
||||||
bool special) {
|
bool special) {
|
||||||
|
// remove the leading space of the first non-control token
|
||||||
|
bool remove_space = model->vocab.tokenizer_add_space_prefix;
|
||||||
int32_t avail = text_len_max;
|
int32_t avail = text_len_max;
|
||||||
int32_t total = 0;
|
int32_t total = 0;
|
||||||
|
|
||||||
for (int32_t i = 0; i < n_tokens; ++i) {
|
for (int32_t i = 0; i < n_tokens; ++i) {
|
||||||
GGML_ASSERT(avail >= 0);
|
GGML_ASSERT(avail >= 0);
|
||||||
int32_t n_chars = llama_token_to_piece(model, tokens[i], text, avail, special);
|
int32_t n_chars = llama_token_to_piece(model, tokens[i], text, avail, remove_space, special);
|
||||||
|
remove_space = remove_space && llama_is_control_token(model->vocab, tokens[i]); // until non-control token
|
||||||
if (n_chars < 0) {
|
if (n_chars < 0) {
|
||||||
avail = 0;
|
avail = 0;
|
||||||
total -= n_chars;
|
total -= n_chars;
|
||||||
|
|
3
llama.h
3
llama.h
|
@ -888,13 +888,14 @@ extern "C" {
|
||||||
// Token Id -> Piece.
|
// Token Id -> Piece.
|
||||||
// Uses the vocabulary in the provided context.
|
// Uses the vocabulary in the provided context.
|
||||||
// Does not write null terminator to the buffer.
|
// Does not write null terminator to the buffer.
|
||||||
// User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
// User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')
|
||||||
// @param special If true, special tokens are rendered in the output.
|
// @param special If true, special tokens are rendered in the output.
|
||||||
LLAMA_API int32_t llama_token_to_piece(
|
LLAMA_API int32_t llama_token_to_piece(
|
||||||
const struct llama_model * model,
|
const struct llama_model * model,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
char * buf,
|
char * buf,
|
||||||
int32_t length,
|
int32_t length,
|
||||||
|
int32_t lstrip,
|
||||||
bool special);
|
bool special);
|
||||||
|
|
||||||
/// @details Convert the provided tokens into text.
|
/// @details Convert the provided tokens into text.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue