sampling : refactor + optimize penalties sampler (#10803)

* sampling : refactor + optimize penalties sampler ggml-ci * common : apply ignore_eos as logit bias ggml-ci * batched : remove penalties sampler * params : allow penalty_last_n == -1 to be equal to context size ggml-ci * common : by default, move the penalties at the end of the sampling chain ggml-ci * common : ignore all EOG tokens Co-authored-by: Diego Devesa <slarengh@gmail.com> * common : move back the penalties at the front of the sampling chain ggml-ci * readme : restore hint about --ignore-eos flag [no ci] * llama : minor ggml-ci * webui : update --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
2024-12-16 12:31:14 +02:00 · 2024-12-16 12:31:14 +02:00 · 644fd71b44
commit 644fd71b44
parent 4ddd199f6f
17 changed files with 111 additions and 152 deletions
--- a/examples/server/public_legacy/index-new.html
+++ b/examples/server/public_legacy/index-new.html
@ -39,7 +39,6 @@
      temperature: 0.8, // adapt all following parameters to optimized min-p requierements. If for non-english, set to 0.6 or lower
      repeat_last_n: 0, // 0 = disable penalty, -1 = context size
      repeat_penalty: 1.0, // 1.0 = disabled
-      penalize_nl: false, // true only useful for infinite completion
      dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
      dry_base: 1.75,     // 0.0 = disabled
      dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty, 2 works well
--- a/examples/server/public_legacy/index.html
+++ b/examples/server/public_legacy/index.html
@ -303,7 +303,6 @@
      temperature: 0.7,
      repeat_last_n: 256, // 0 = disable penalty, -1 = context size
      repeat_penalty: 1.18, // 1.0 = disabled
-      penalize_nl: false,
      dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
      dry_base: 1.75,     // 0.0 = disabled
      dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty, 2 works well
@ -1006,7 +1005,6 @@
            ${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
            ${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
            ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
-            ${BoolField({ label: "Penalize repetition of newlines", name: "penalize_nl", value: params.value.penalize_nl })}
            ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
            ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}