ggml : ggml_rope now takes a vector with positions instead of n_past
This commit is contained in:
parent
3b4bab6a38
commit
1fb033fd85
9 changed files with 270 additions and 131 deletions
|
@ -854,29 +854,30 @@ kernel void kernel_alibi_f32(
|
|||
}
|
||||
|
||||
kernel void kernel_rope(
|
||||
device const void * src0,
|
||||
device float * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant int64_t & ne0,
|
||||
constant int64_t & ne1,
|
||||
constant int64_t & ne2,
|
||||
constant int64_t & ne3,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
constant int & n_past,
|
||||
constant int & n_dims,
|
||||
constant int & mode,
|
||||
constant float & freq_base,
|
||||
constant float & freq_scale,
|
||||
device const void * src0,
|
||||
device const int32_t * src1,
|
||||
device float * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant int64_t & ne0,
|
||||
constant int64_t & ne1,
|
||||
constant int64_t & ne2,
|
||||
constant int64_t & ne3,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
constant int & n_past,
|
||||
constant int & n_dims,
|
||||
constant int & mode,
|
||||
constant float & freq_base,
|
||||
constant float & freq_scale,
|
||||
uint tiitg[[thread_index_in_threadgroup]],
|
||||
uint3 tptg[[threads_per_threadgroup]],
|
||||
uint3 tgpig[[threadgroup_position_in_grid]]) {
|
||||
|
@ -886,7 +887,9 @@ kernel void kernel_rope(
|
|||
|
||||
const bool is_neox = mode & 2;
|
||||
|
||||
const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2);
|
||||
device const int32_t * pos = src1;
|
||||
|
||||
const int64_t p = pos[i2];
|
||||
|
||||
const float theta_0 = freq_scale * (float)p;
|
||||
const float inv_ndims = -1.f/n_dims;
|
||||
|
@ -1320,8 +1323,8 @@ kernel void kernel_mul_mat_q3_K_f32(
|
|||
|
||||
float yl[32];
|
||||
|
||||
const uint16_t kmask1 = 0x3030;
|
||||
const uint16_t kmask2 = 0x0f0f;
|
||||
//const uint16_t kmask1 = 0x3030;
|
||||
//const uint16_t kmask2 = 0x0f0f;
|
||||
|
||||
const int tid = tiisg/4;
|
||||
const int ix = tiisg%4;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue