changed allowed saving of pieces to reduce calls to llama_token_to_piece
This commit is contained in:
parent
9d3ba0bacd
commit
f29add56d8
3 changed files with 17 additions and 4 deletions
|
@ -166,7 +166,7 @@ llama_token llama_sampling_sample(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx_sampling->grammar != NULL) {
|
if (ctx_sampling->grammar != NULL) {
|
||||||
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar);
|
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (temp < 0.0) {
|
if (temp < 0.0) {
|
||||||
|
|
15
llama.cpp
15
llama.cpp
|
@ -7106,7 +7106,11 @@ void llama_sample_repetition_penalties(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, const struct llama_grammar * grammar) {
|
void llama_sample_grammar(
|
||||||
|
struct llama_context * ctx,
|
||||||
|
llama_token_data_array * candidates,
|
||||||
|
const struct llama_grammar * grammar,
|
||||||
|
char const * const * pieces) {
|
||||||
GGML_ASSERT(ctx);
|
GGML_ASSERT(ctx);
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
@ -7125,7 +7129,14 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
|
||||||
|
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
const llama_token id = candidates->data[i].id;
|
const llama_token id = candidates->data[i].id;
|
||||||
const std::string piece = llama_token_to_piece(ctx, id);
|
std::string piece;
|
||||||
|
|
||||||
|
if (pieces != nullptr && pieces[id] != nullptr) {
|
||||||
|
piece = std::string(pieces[id]);
|
||||||
|
} else {
|
||||||
|
piece = llama_token_to_piece(ctx, id);
|
||||||
|
}
|
||||||
|
|
||||||
if (id == eos) {
|
if (id == eos) {
|
||||||
if (!allow_eos) {
|
if (!allow_eos) {
|
||||||
candidates->data[i].logit = -INFINITY;
|
candidates->data[i].logit = -INFINITY;
|
||||||
|
|
4
llama.h
4
llama.h
|
@ -722,7 +722,9 @@ extern "C" {
|
||||||
LLAMA_API void llama_sample_grammar(
|
LLAMA_API void llama_sample_grammar(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_token_data_array * candidates,
|
llama_token_data_array * candidates,
|
||||||
const struct llama_grammar * grammar);
|
const struct llama_grammar * grammar,
|
||||||
|
char const * const * pieces);
|
||||||
|
|
||||||
|
|
||||||
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
||||||
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
|
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue