llama : add DRY sampler (#9702)
* sampling : add DRY sampler (post-refactor) * DRY: Trying to fix coauthors, removed unneeded line * DRY: Fixed redundant code * DRY: Fixed crash issue due to DRY being in chain but uninitialized --------- Co-authored-by: l3utterfly <gc.pthzfoldr@gmail.com> Co-authored-by: pi6am <34464159+pi6am@users.noreply.github.com>
This commit is contained in:
parent
d80fb71f8b
commit
ff252ea48e
17 changed files with 713 additions and 63 deletions
|
@ -40,6 +40,10 @@
|
|||
repeat_last_n: 0, // 0 = disable penalty, -1 = context size
|
||||
repeat_penalty: 1.0, // 1.0 = disabled
|
||||
penalize_nl: false, // true only useful for infinite completion
|
||||
dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
|
||||
dry_base: 1.75, // 0.0 = disabled
|
||||
dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty, 2 works well
|
||||
dry_penalty_last_n: -1, // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||
top_k: 0, // <= 0 to use vocab size
|
||||
top_p: 1.0, // 1.0 = disabled
|
||||
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
|
||||
|
@ -833,13 +837,17 @@ return html`
|
|||
<fieldset class="params">
|
||||
${IntField({ label: "Top-K", title: "Limits the selection of the next token to the K most probable tokens. 1 means no randomness = greedy sampling. If set to 0, it means the entire vocabulary size is considered.", max: 100, min: 0, step: 1, name: "top_k", value: params.value.top_k })}
|
||||
${IntField({ label: "Penalize Last N", title: "The last n tokens that are taken into account to penalise repetitions. A value of 0 means that this function is deactivated and -1 means that the entire size of the context is taken into account.", max: 2048, min: 0, step: 16, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
||||
${FloatField({ label: "Top-P", title: "Limits the selection of the next token to a subset of tokens whose combined probability reaches a threshold value P = top-P. If set to 1, it means the entire vocabulary size is considered.", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
|
||||
${FloatField({ label: "Presence Penalty", title: "A penalty that is applied if certain tokens appear repeatedly in the generated text. A higher value leads to fewer repetitions.", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
|
||||
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
|
||||
${FloatField({ label: "Frequency Penalty", title: "A penalty that is applied based on the frequency with which certain tokens occur in the training data set. A higher value results in rare tokens being favoured.", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
|
||||
${FloatField({ label: "Top-P", title: "Limits the selection of the next token to a subset of tokens whose combined probability reaches a threshold value P = top-P. If set to 1, it means the entire vocabulary size is considered.", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
|
||||
${FloatField({ label: "Typical-P", title: "Activates local typical sampling, a method used to limit the prediction of tokens that are atypical in the current context. The parameter p controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
|
||||
${FloatField({ label: "XTC probability", title: "Sets the chance for token removal (checked once on sampler start)", max: 1.0, min: 0.0, name: "xtc_probability", step: 0.01, value: params.value.xtc_probability })}
|
||||
${FloatField({ label: "XTC threshold", title: "Sets a minimum probability threshold for tokens to be removed", max: 0.5, min: 0.0, name: "xtc_threshold", step: 0.01, value: params.value.xtc_threshold })}
|
||||
${FloatField({ label: "DRY Penalty Multiplier", title: "Set the DRY repetition penalty multiplier. Default is 0.0, which disables DRY.", max: 5.0, min: 0.0, name: "dry_multiplier", step: 0.01, value: params.value.dry_multiplier })}
|
||||
${FloatField({ label: "DRY Base", title: "Set the DRY repetition penalty base value. Default is 1.75", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
|
||||
${IntField({ label: "DRY Allowed Length", title: "Tokens that extend repetition beyond this receive exponentially increasing penalty. Default is 2", max: 10, min: 1, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
|
||||
${IntField({ label: "DRY Penalty Last N", title: "How many tokens to scan for repetitions. Default is -1, where 0 is disabled and -1 is context size", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
|
||||
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
|
||||
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
|
||||
</fieldset>
|
||||
|
||||
|
@ -1144,6 +1152,8 @@ document.addEventListener('DOMContentLoaded', (event) => {
|
|||
repeat_penalty: { snapValue: 1.0, snapRangeMultiplier: 4 },
|
||||
presence_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },
|
||||
frequency_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },
|
||||
dry_multiplier: { snapValue: 0.0, snapRangeMultiplier: 4 },
|
||||
dry_base: { snapValue: 1.75, snapRangeMultiplier: 4 },
|
||||
};
|
||||
// add an event listener for each slider
|
||||
Object.keys(snapSettings).forEach(sliderName => {
|
||||
|
|
|
@ -304,6 +304,10 @@
|
|||
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
|
||||
repeat_penalty: 1.18, // 1.0 = disabled
|
||||
penalize_nl: false,
|
||||
dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
|
||||
dry_base: 1.75, // 0.0 = disabled
|
||||
dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty, 2 works well
|
||||
dry_penalty_last_n: -1, // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||
top_k: 40, // <= 0 to use vocab size
|
||||
top_p: 0.95, // 1.0 = disabled
|
||||
min_p: 0.05, // 0 = disabled
|
||||
|
@ -1015,6 +1019,10 @@
|
|||
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
|
||||
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
|
||||
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
|
||||
${FloatField({ label: "DRY Penalty Multiplier", max: 5.0, min: 0.0, name: "dry_multiplier", step: 0.01, value: params.value.dry_multiplier })}
|
||||
${FloatField({ label: "DRY Base", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
|
||||
${IntField({ label: "DRY Allowed Length", max: 10, min: 2, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
|
||||
${IntField({ label: "DRY Penalty Last N", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
|
||||
${FloatField({ label: "XTC probability", max: 1.0, min: 0.0, name: "xtc_probability", step: 0.01, value: params.value.xtc_probability })}
|
||||
${FloatField({ label: "XTC threshold", max: 0.5, min: 0.0, name: "xtc_threshold", step: 0.01, value: params.value.xtc_threshold })}
|
||||
</fieldset>
|
||||
|
|
0
examples/server/public/style.css
Executable file → Normal file
0
examples/server/public/style.css
Executable file → Normal file
Loading…
Add table
Add a link
Reference in a new issue