Temporarily hard-coded mscale value for DeepSeek-V2 (FIXME!).
This commit is contained in:
parent
7e4786bbfb
commit
71a742256c
1 changed files with 2 additions and 0 deletions
2
ggml.c
2
ggml.c
|
@ -14073,6 +14073,8 @@ static void rope_yarn(
|
||||||
// Get n-d magnitude scaling corrected for interpolation
|
// Get n-d magnitude scaling corrected for interpolation
|
||||||
mscale *= 1.0f + 0.1f * logf(1.0f / freq_scale);
|
mscale *= 1.0f + 0.1f * logf(1.0f / freq_scale);
|
||||||
}
|
}
|
||||||
|
// TODO ugly hack for DeepSeek-V2 until we find a solution
|
||||||
|
mscale = 1.0;
|
||||||
*cos_theta = cosf(theta) * mscale;
|
*cos_theta = cosf(theta) * mscale;
|
||||||
*sin_theta = sinf(theta) * mscale;
|
*sin_theta = sinf(theta) * mscale;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue