metal : fix block_q_n_dot_y
This commit is contained in:
parent
79d4732c70
commit
9db276f0c2
1 changed files with 8 additions and 8 deletions
|
@ -453,10 +453,10 @@ inline float block_q_n_dot_y(device const block_q5_0 * qb_curr, float sumy, thre
|
||||||
device const uint16_t * qs = ((device const uint16_t *)qb_curr + 3 + il/2);
|
device const uint16_t * qs = ((device const uint16_t *)qb_curr + 3 + il/2);
|
||||||
const uint32_t qh = *((device const uint32_t *)qb_curr->qh);
|
const uint32_t qh = *((device const uint32_t *)qb_curr->qh);
|
||||||
for (int i = 0; i < 8; i+=2) {
|
for (int i = 0; i < 8; i+=2) {
|
||||||
acc[0] += yl[i + 0] * ((qs[i / 2] & 0x000F) | ((qh >> (i+0+il ) << 4 ) & 0x0010))
|
acc[0] += yl[i + 0] * ((qs[i / 2] & 0x000F) | ((qh >> (i+0+il ) << 4 ) & 0x00010))
|
||||||
+ yl[i + 1] * ((qs[i / 2] & 0x0F00) | ((qh >> (i+1+il ) << 12) & 0x1000));
|
+ yl[i + 1] * ((qs[i / 2] & 0x0F00) | ((qh >> (i+1+il ) << 12) & 0x01000));
|
||||||
acc[1] += yl[i + 8] * ((qs[i / 2] & 0x00F0) | ((qh >> (i+0+il+QK5_0/2) << 4 ) & 0x0010))
|
acc[1] += yl[i + 8] * ((qs[i / 2] & 0x00F0) | ((qh >> (i+0+il+QK5_0/2) << 8 ) & 0x00100))
|
||||||
+ yl[i + 9] * ((qs[i / 2] & 0xF000) | ((qh >> (i+1+il+QK5_0/2) << 12) & 0x1000));
|
+ yl[i + 9] * ((qs[i / 2] & 0xF000) | ((qh >> (i+1+il+QK5_0/2) << 16) & 0x10000));
|
||||||
}
|
}
|
||||||
return d * (sumy * -16.f + acc[0] + acc[1]);
|
return d * (sumy * -16.f + acc[0] + acc[1]);
|
||||||
}
|
}
|
||||||
|
@ -472,10 +472,10 @@ inline float block_q_n_dot_y(device const block_q5_1 * qb_curr, float sumy, thre
|
||||||
device const uint16_t * qs = ((device const uint16_t *)qb_curr + 4 + il/2);
|
device const uint16_t * qs = ((device const uint16_t *)qb_curr + 4 + il/2);
|
||||||
const uint32_t qh = *((device const uint32_t *)qb_curr->qh);
|
const uint32_t qh = *((device const uint32_t *)qb_curr->qh);
|
||||||
for (int i = 0; i < 8; i+=2) {
|
for (int i = 0; i < 8; i+=2) {
|
||||||
acc[0] += yl[i + 0] * ((qs[i / 2] & 0x000F) | ((qh >> (i+0+il ) << 4 ) & 0x0010))
|
acc[0] += yl[i + 0] * ((qs[i / 2] & 0x000F) | ((qh >> (i+0+il ) << 4 ) & 0x00010))
|
||||||
+ yl[i + 1] * ((qs[i / 2] & 0x0F00) | ((qh >> (i+1+il ) << 12) & 0x1000));
|
+ yl[i + 1] * ((qs[i / 2] & 0x0F00) | ((qh >> (i+1+il ) << 12) & 0x01000));
|
||||||
acc[1] += yl[i + 8] * ((qs[i / 2] & 0x00F0) | ((qh >> (i+0+il+QK5_1/2) << 4 ) & 0x0010))
|
acc[1] += yl[i + 8] * ((qs[i / 2] & 0x00F0) | ((qh >> (i+0+il+QK5_0/2) << 8 ) & 0x00100))
|
||||||
+ yl[i + 9] * ((qs[i / 2] & 0xF000) | ((qh >> (i+1+il+QK5_1/2) << 12) & 0x1000));
|
+ yl[i + 9] * ((qs[i / 2] & 0xF000) | ((qh >> (i+1+il+QK5_0/2) << 16) & 0x10000));
|
||||||
}
|
}
|
||||||
return d * (acc[0] + acc[1]) + sumy * m;
|
return d * (acc[0] + acc[1]) + sumy * m;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue