From dbe9ef7783817d53026b49682d4cfb5a74f64672 Mon Sep 17 00:00:00 2001 From: MaggotHATE Date: Tue, 8 Oct 2024 01:19:39 +0500 Subject: [PATCH] Added XTC to `test-sampling` --- tests/test-sampling.cpp | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 6e021c4c7..cd6c61ba0 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -111,6 +111,28 @@ static void test_min_p(const std::vector & probs, const std::vector & probs, const std::vector & expected_probs, float p, float t, float t_max) { + const size_t n_vocab = probs.size(); + + std::vector cur; + cur.reserve(n_vocab); + for (llama_token token_id = 0; token_id < (llama_token)n_vocab; token_id++) { + const float logit = logf(probs[token_id]); + cur.emplace_back(llama_token_data{token_id, logit, 0.0f}); + } + + llama_token_data_array cur_p = { cur.data(), cur.size(), -1, false }; + APPLY(llama_sampler_init_softmax(), &cur_p); + DUMP(&cur_p); + APPLY(llama_sampler_init_xtc(p, t, t_max, 0, 0), &cur_p); + DUMP(&cur_p); + + GGML_ASSERT(cur_p.size == expected_probs.size()); + for (size_t i = 0; i < cur_p.size; i++) { + GGML_ASSERT(fabs(cur_p.data[i].p - expected_probs[i]) < 1e-5); + } +} + static void test_typical(const std::vector & probs, const std::vector & expected_probs, float p) { const size_t n_vocab = probs.size(); @@ -279,12 +301,13 @@ static void test_perf() { data.emplace_back(llama_token_data{i, logit, 0.0f}); } - BENCH(llama_sampler_init_top_k (40), data, 32); - BENCH(llama_sampler_init_top_p (0.8f, 1), data, 32); - BENCH(llama_sampler_init_min_p (0.2f, 1), data, 32); - BENCH(llama_sampler_init_tail_free(0.5f, 1), data, 32); - BENCH(llama_sampler_init_typical (0.5f, 1), data, 32); - BENCH(llama_sampler_init_softmax (), data, 32); + BENCH(llama_sampler_init_top_k (40), data, 32); + BENCH(llama_sampler_init_top_p (0.8f, 1), data, 32); + BENCH(llama_sampler_init_min_p (0.2f, 1), data, 32); + BENCH(llama_sampler_init_tail_free(0.5f, 1), data, 32); + BENCH(llama_sampler_init_typical (0.5f, 1), data, 32); + BENCH(llama_sampler_init_xtc (1.0f, 0.1f, 0.8f, 1, 1), data, 32); + BENCH(llama_sampler_init_softmax (), data, 32); } int main(void) { @@ -309,6 +332,11 @@ int main(void) { test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f}, 0.76f); test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f}, 1.00f); + test_xtc({0.4f, 0.3f, 0.2f, 0.1f}, {0.1f}, 0.99f, 0.10f, 1.00f); + test_xtc({0.4f, 0.3f, 0.2f, 0.1f}, {0.4f, 0.1f}, 0.99f, 0.10f, 0.35f); + test_xtc({0.4f, 0.3f, 0.2f, 0.1f}, {0.4f, 0.3f, 0.1f}, 0.99f, 0.10f, 0.25f); + test_xtc({0.4f, 0.3f, 0.2f, 0.1f}, {0.4f, 0.2f, 0.1f}, 0.99f, 0.20f, 0.35f); + test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f}, 0.25f); test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.75f); test_tfs({0.1f, 0.15f, 0.2f, 0.25f, 0.3f}, {0.3f, 0.25f}, 0.99f);