Simplified chances calculation

To be more inline with the original implementation, chance is calculated once at the beginning.
2024-10-04 18:30:46 +05:00 · 2024-10-04 18:30:46 +05:00 · db54ac5df4
commit db54ac5df4
parent 9455194056
1 changed files with 5 additions and 8 deletions
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -1089,18 +1089,15 @@ static void llama_sample_xtc_apply(struct llama_sampler * smpl, llama_token_data
    // going through all candidates from back to front, easier to keep the last of probables
    for (int i = (cur_p->size - 1); i >= 0; --i) {
        if (cur_p->data[i].p >= ctx->threshold && cur_p->data[i].p <= ctx->threshold_max) {
-            if (removed == 0 || chance <= ctx->probability) {
-                ++removed;
-                if (removed >= 2) {
-                    // .logits are used for sorting and calculating .p in llama_sample_softmax_impl
-                    cur_p->data[i].logit = -999.0f;
-                    chance = (float)(rd()%100)/100;
-                }
+            ++removed;
+            if (removed > 1) {
+                // .logits are used for sorting and calculating .p in llama_sample_softmax_impl
+                cur_p->data[i].logit = -999.0f;
            }
        }
    }

-    if (removed >= 2) {
+    if (removed > 1) {
        // sorting with new logits, ex-last probable will be the first anyway
        std::sort(cur_p->data, cur_p->data + cur_p->size, [](const llama_token_data & a, const llama_token_data & b) {
            return a.logit > b.logit;