llama : add phi3 128K model support (#7225)
* add phi3 128k support in convert-hf-to-gguf * add phi3 128k support in cuda * address build warnings on llama.cpp * adjust index value in cuda long rope freq factors * add long rope support in ggml cpu backend * make freq factors only depend on ctx size * remove unused rope scaling type 'su' frin gguf converter * fix flint warnings on convert-hf-to-gguf.py * set to the short freq factor when context size is small than trained context size * add one line of comments * metal : support rope freq_factors * ggml : update ggml_rope_ext API to support freq. factors * backends : add dev messages to support rope freq. factors * minor : style * tests : update to use new rope API * backends : fix pragma semicolons * minor : cleanup * llama : move rope factors from KV header to tensors * llama : remove tmp assert * cuda : fix compile warning * convert : read/write n_head_kv * llama : fix uninitialized tensors --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
6369bf0433
commit
201cc11afa
15 changed files with 484 additions and 233 deletions
|
@ -1640,6 +1640,7 @@ static void rope_yarn_corr_dims(
|
|||
typedef void (rope_t)(
|
||||
device const void * src0,
|
||||
device const int32_t * src1,
|
||||
device const float * src2,
|
||||
device float * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
|
@ -1675,6 +1676,7 @@ template<typename T>
|
|||
kernel void kernel_rope(
|
||||
device const void * src0,
|
||||
device const int32_t * src1,
|
||||
device const float * src2,
|
||||
device float * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
|
@ -1744,8 +1746,10 @@ kernel void kernel_rope(
|
|||
|
||||
// simplified from `(ib * n_dims + ic) * inv_ndims`
|
||||
const float cur_rot = inv_ndims*ic - ib;
|
||||
const float freq_factor = src2 != src0 ? src2[ic/2] : 1.0f;
|
||||
|
||||
const float theta = theta_0 * pow(freq_base, cur_rot) / freq_factor;
|
||||
|
||||
const float theta = theta_0 * pow(freq_base, cur_rot);
|
||||
float cos_theta, sin_theta;
|
||||
rope_yarn(theta, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor, &cos_theta, &sin_theta);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue