llama : add DRY sampler (#9702)

* sampling : add DRY sampler (post-refactor)

* DRY: Trying to fix coauthors, removed unneeded line

* DRY: Fixed redundant code

* DRY: Fixed crash issue due to DRY being in chain but uninitialized

---------

Co-authored-by: l3utterfly <gc.pthzfoldr@gmail.com>
Co-authored-by: pi6am <34464159+pi6am@users.noreply.github.com>
This commit is contained in:
wwoodsTM 2024-10-25 10:07:34 -06:00 committed by GitHub
parent d80fb71f8b
commit ff252ea48e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 713 additions and 63 deletions

View file

@ -10,6 +10,8 @@
#include <string>
#include <vector>
extern struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, float dry_multiplier, float dry_base, int32_t dry_allowed_length, int32_t dry_penalty_last_n, const std::vector<std::vector<llama_token>>& seq_breakers);
static void dump(const llama_token_data_array * cur_p) {
for (size_t i = 0; i < cur_p->size; i++) {
printf("%d: %f (%f)\n", cur_p->data[i].id, cur_p->data[i].p, cur_p->data[i].logit);
@ -167,6 +169,29 @@ static void test_penalties(
tester.check();
}
static void test_dry(
const std::vector<float> & probs, const std::vector<llama_token> & last_tokens,
const std::vector<float> & expected_probs, float dry_multiplier, float dry_base,
int dry_allowed_length, int dry_penalty_last_n,
const std::vector<std::vector<llama_token>> & seq_breakers
) {
GGML_ASSERT(probs.size() == expected_probs.size());
sampler_tester tester(probs, expected_probs);
auto * sampler = llama_sampler_init_dry_testing(1024, dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n, seq_breakers);
for (size_t i = 0; i < last_tokens.size(); i++) {
llama_sampler_accept(sampler, last_tokens[i]);
}
DUMP(&tester.cur_p);
tester.apply(sampler);
tester.apply(llama_sampler_init_dist(0));
DUMP(&tester.cur_p);
tester.check();
}
static void test_sampler_queue(const size_t n_vocab, const std::string & samplers_sequence, const int top_k, const float top_p, const float min_p
) {
sampler_tester tester(n_vocab);
@ -333,6 +358,13 @@ int main(void) {
test_penalties({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2}, {0.499966f, 0.499966f, 0.000023f, 0.000023f, 0.000023f}, 1.0f, 5.0f, 5.0f);
test_penalties({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 0}, {0.499977f, 0.499977f, 0.000023f, 0.000023f, 0.000000f}, 1.0f, 5.0f, 5.0f);
test_dry({0.25f, 0.25f, 0.25f, 0.25f}, {0, 1}, {0.25f, 0.25f, 0.25f, 0.25f}, 1.0f, 1.1f, 2, 4, {});
test_dry({0.25f, 0.25f, 0.25f, 0.25f}, {0, 1, 2, 0, 1}, {0.296923f, 0.296923f, 0.296923f, 0.109232f}, 1.0f, 1.1f, 2, 5, {});
test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 2, 6, {{3}});
test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 0, 1}, {0.241818f, 0.241818f, 0.241818f, 0.241818f, 0.032727f}, 2.0f, 1.1f, 2, 5, {});
test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 4, 7, {});
test_sampler_queue(10000, "k", 10000, 1.0f, 1.0f);
test_sampler_queue(10000, "k", 1, 1.0f, 1.0f);
test_sampler_queue(10000, "p", 10000, 1.0f, 1.0f);