address review comments
This commit is contained in:
parent
fe788c45c8
commit
e0b120c3ca
3 changed files with 4 additions and 4 deletions
|
@ -683,9 +683,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
printf(" --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
|
||||
printf(" --rope-scaling {none,linear,yarn}\n");
|
||||
printf(" RoPE frequency scaling method, defaults to linear unless specified by the model\n");
|
||||
printf(" --rope-scale N RoPE context scaling factor, inverse of --rope-freq-scale\n");
|
||||
printf(" --rope-scale N RoPE context scaling factor, expands context by a factor of N\n");
|
||||
printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n");
|
||||
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
|
||||
printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n");
|
||||
printf(" --yarn-ext-factor N YaRN extrapolation mix factor (default: %.1f)\n", params.yarn_ext_factor);
|
||||
printf(" --yarn-attn-factor N YaRN magnitude scaling factor (default: %.1f)\n", params.yarn_attn_factor);
|
||||
printf(" --yarn-beta-fast N YaRN low correction dim (default: %.1f)\n", params.yarn_beta_fast);
|
||||
|
|
|
@ -704,7 +704,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||
printf(" --rope-scaling {none,linear,yarn}\n");
|
||||
printf(" RoPE frequency scaling method, defaults to linear unless specified by the model\n");
|
||||
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
|
||||
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
|
||||
printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n");
|
||||
printf(" --yarn-ext-factor N YaRN extrapolation mix factor (default: %.1f)\n", params.yarn_ext_factor);
|
||||
printf(" --yarn-attn-factor N YaRN magnitude scaling factor (default: %.1f)\n", params.yarn_attn_factor);
|
||||
printf(" --yarn-beta-fast N YaRN low correction dim (default: %.1f)\n", params.yarn_beta_fast);
|
||||
|
|
2
ggml.c
2
ggml.c
|
@ -12650,7 +12650,7 @@ static void ggml_compute_forward_clamp(
|
|||
|
||||
// ggml_compute_forward_rope
|
||||
|
||||
static inline float rope_yarn_ramp(const float low, const float high, const int i0) {
|
||||
static float rope_yarn_ramp(const float low, const float high, const int i0) {
|
||||
const float y = (i0 / 2 - low) / MIN(0.001f, high - low);
|
||||
return 1 - MIN(1, MAX(0, y));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue