sampling : add XTC sampler (#9742)
* Initial XTC commit Adds XTC sampler, not activated by default, but recommended settings by default. * Cleanup * Simplified chances calculation To be more inline with the original implementation, chance is calculated once at the beginning. * First fixes by comments Still need to look into sorting * Fixed trailing backspaces * Fixed RNG to be reproduceable Thanks to @slaren for directions * Fixed forgotten header * Moved `min_keep` Moved from conditions to a simple check at the end. * Fixed broken randomization Thanks to @slaren for explanation * Swapped sorting for a custom algorithm Shifts tokens to remove the penalized ones, then puts the penalized at the back. Should make `min_keep` still viable. * Algorithm rework 1. Scan token from top till the first non-penalizable 2. Remove the last captured token (the least probable above threshold) 3. Shift all tokens to override the remaining penalizable 4. Penalize and put them at the the bottom. * Added XTC to `test-sampling` * Simplified algorithm and more tests * Updated info in common and args * Merged back lost commits in common and arg * Update dump info in common * Fixed incorrect min_keep check * Added XTC to README * Renamed parameters, fixed info and defaults * probability is at 0 by default, but XTC is included in sampling queue * threshold higher than 0.5 switches XTC off * Initial server support * Added XTC to server UIs * Fixed labels in old server UI * Made algorithm safer and more readable * Removed xtc_threshold_max * Fixed arg after update * Quick fixes by comments * Simplified algorithm since threshold_max is removed * Renamed random distribution * Fixed tests and outdated README * Small fixes
This commit is contained in:
parent
dcdd535302
commit
fbc98b748e
11 changed files with 195 additions and 10 deletions
|
@ -1059,6 +1059,101 @@ struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, floa
|
|||
};
|
||||
}
|
||||
|
||||
// xtc
|
||||
|
||||
struct llama_sampler_xtc {
|
||||
const float probability;
|
||||
const float threshold;
|
||||
const size_t min_keep;
|
||||
|
||||
const uint32_t seed;
|
||||
uint32_t seed_cur;
|
||||
|
||||
std::mt19937 rng;
|
||||
};
|
||||
|
||||
static const char * llama_sampler_xtc_name(const struct llama_sampler * /*smpl*/) {
|
||||
return "xtc";
|
||||
}
|
||||
|
||||
static void llama_sample_xtc_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
||||
auto * ctx = (llama_sampler_xtc *) smpl->ctx;
|
||||
|
||||
if (ctx->probability <= 0.0f
|
||||
|| ctx->threshold > 0.5f
|
||||
|| cur_p->size < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::uniform_real_distribution<float> distribution(0.0f, 1.0f);
|
||||
float chance = distribution(ctx->rng);
|
||||
if (chance > ctx->probability) return;
|
||||
|
||||
// in case it's not sorted/recalculated yet
|
||||
llama_sampler_softmax_impl(cur_p);
|
||||
|
||||
int pos_last = 0;
|
||||
|
||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||
if (cur_p->data[i].p >= ctx->threshold) {
|
||||
pos_last = i;
|
||||
} else break;
|
||||
}
|
||||
|
||||
if (cur_p->size - pos_last >= ctx->min_keep && pos_last > 0) {
|
||||
cur_p->data += pos_last;
|
||||
cur_p->size -= pos_last;
|
||||
}
|
||||
}
|
||||
|
||||
static struct llama_sampler * llama_sampler_xtc_clone(const struct llama_sampler * smpl) {
|
||||
const auto * ctx = (const llama_sampler_xtc *) smpl->ctx;
|
||||
auto * result = llama_sampler_init_xtc(ctx->probability, ctx->threshold, ctx->min_keep, ctx->seed);
|
||||
|
||||
// copy the state
|
||||
{
|
||||
auto * result_ctx = (llama_sampler_xtc *) result->ctx;
|
||||
|
||||
result_ctx->rng = ctx->rng;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void llama_sampler_xtc_free(struct llama_sampler * smpl) {
|
||||
delete (llama_sampler_xtc *) smpl->ctx;
|
||||
}
|
||||
|
||||
static void llama_sampler_xtc_reset(struct llama_sampler * smpl) {
|
||||
auto * ctx = (llama_sampler_xtc *) smpl->ctx;
|
||||
ctx->seed_cur = get_rng_seed(ctx->seed);
|
||||
ctx->rng.seed(ctx->seed_cur);
|
||||
}
|
||||
|
||||
static struct llama_sampler_i llama_sampler_xtc_i = {
|
||||
/* .name = */ llama_sampler_xtc_name,
|
||||
/* .accept = */ nullptr,
|
||||
/* .apply = */ llama_sample_xtc_apply,
|
||||
/* .reset = */ llama_sampler_xtc_reset,
|
||||
/* .clone = */ llama_sampler_xtc_clone,
|
||||
/* .free = */ llama_sampler_xtc_free,
|
||||
};
|
||||
|
||||
struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep, uint32_t seed) {
|
||||
auto seed_cur = get_rng_seed(seed);
|
||||
return new llama_sampler {
|
||||
/* .iface = */ &llama_sampler_xtc_i,
|
||||
/* .ctx = */ new llama_sampler_xtc {
|
||||
/* .probability = */ p,
|
||||
/* .threshold = */ t,
|
||||
/* .min_keep = */ min_keep,
|
||||
/* .seed = */ seed,
|
||||
/* .seed_cur = */ seed_cur,
|
||||
/* .rng = */ std::mt19937(seed_cur),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// mirostat
|
||||
|
||||
struct llama_sampler_mirostat {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue