diff --git a/examples/main/README.md b/examples/main/README.md index 4c16d5545..620934dad 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -252,7 +252,7 @@ By removing top tokens XTC can improve the variety of answers, break writing cli Being experimental and unique, XTC is disabled by default. The recommended combination of samplers is Min-P followed by XTC on its default settings: `--sampling-seq mx --min-p 0.02 --xtc-probability 0.5`. -Example usage: `--xtc-probability 0.5 --xtc-threshold 0.1 +Example usage: `--xtc-probability 0.5 --xtc-threshold 0.1` ### Logit Bias diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 0f1cab8b2..67a78c3ac 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -1102,7 +1102,7 @@ static void llama_sample_xtc_apply(struct llama_sampler * smpl, llama_token_data if (cur_p->size - pos_last >= ctx->min_keep && pos_last > 0) { cur_p->data += pos_last; - cur_p->size = cur_p->size - pos_last; + cur_p->size -= pos_last; } }