remove ifdefs

This commit is contained in:
Eve 2024-12-07 14:20:56 -05:00
parent 4b65c6b90d
commit f5a15fc69e
3 changed files with 46 additions and 28 deletions

View file

@ -15,48 +15,54 @@ layout (binding = 0) readonly buffer A_PACKED32 {A_TYPE_PACKED32 data_a_packed32
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]);
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(0, 0);
}
#endif #endif
#if defined(DATA_A_F16) #if defined(DATA_A_F16)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]);
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(0, 0);
}
#endif #endif
#if defined(DATA_A_Q4_0) #if defined(DATA_A_Q4_0)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]); const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return (vec2(vui & 0xF, vui >> 4) - 8.0f) * d; return (vec2(vui & 0xF, vui >> 4) - 8.0f);
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]);
return (vec4(vui & 0xF, (vui >> 4) & 0xF, (vui >> 8) & 0xF, vui >> 12) - 8.0f); return (vec4(vui & 0xF, (vui >> 4) & 0xF, (vui >> 8) & 0xF, vui >> 12) - 8.0f);
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), 0);
}
#endif #endif
#if defined(DATA_A_Q4_1) #if defined(DATA_A_Q4_1)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const float m = float(data_a[a_offset + ib].m);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]); const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2(vui & 0xF, vui >> 4) * d + m; return vec2(vui & 0xF, vui >> 4);
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a_packed16[a_offset + ib].d);
const float m = float(data_a_packed16[a_offset + ib].m);
const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]);
return vec4(vui & 0xF, (vui >> 4) & 0xF, (vui >> 8) & 0xF, vui >> 12) * d + m; return vec4(vui & 0xF, (vui >> 4) & 0xF, (vui >> 8) & 0xF, vui >> 12);
}
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), float(data_a[a_offset + ib].m));
} }
#endif #endif
#if defined(DATA_A_Q5_0) #if defined(DATA_A_Q5_0)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint uint_qh = uint(data_a[a_offset + ib].qh[1]) << 16 | data_a[a_offset + ib].qh[0]; const uint uint_qh = uint(data_a[a_offset + ib].qh[1]) << 16 | data_a[a_offset + ib].qh[0];
const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]); const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return (vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) - 16.0f) * d; return (vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) - 16.0f);
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const uint uint_qh = uint(data_a_packed16[a_offset + ib].qh[1]) << 16 | data_a_packed16[a_offset + ib].qh[0]; const uint uint_qh = uint(data_a_packed16[a_offset + ib].qh[1]) << 16 | data_a_packed16[a_offset + ib].qh[0];
@ -65,48 +71,54 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]);
return (vec4(((vui >> 0) & 0xF) | qh0.x, ((vui >> 4) & 0xF) | qh0.y, ((vui >> 8) & 0xF) | qh1.x, (vui >> 12) | qh1.y) - 16.0f); return (vec4(((vui >> 0) & 0xF) | qh0.x, ((vui >> 4) & 0xF) | qh0.y, ((vui >> 8) & 0xF) | qh1.x, (vui >> 12) | qh1.y) - 16.0f);
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), 0);
}
#endif #endif
#if defined(DATA_A_Q5_1) #if defined(DATA_A_Q5_1)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const float m = float(data_a[a_offset + ib].m);
const uint uint_qh = data_a[a_offset + ib].qh; const uint uint_qh = data_a[a_offset + ib].qh;
const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]); const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) * d + m; return vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y);
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a_packed16[a_offset + ib].d);
const float m = float(data_a_packed16[a_offset + ib].m);
const uint uint_qh = data_a_packed16[a_offset + ib].qh; const uint uint_qh = data_a_packed16[a_offset + ib].qh;
const ivec2 qh0 = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); const ivec2 qh0 = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10);
const ivec2 qh1 = ivec2(((uint_qh >> (iqs + 1)) << 4) & 0x10, (uint_qh >> (iqs + 13)) & 0x10); const ivec2 qh1 = ivec2(((uint_qh >> (iqs + 1)) << 4) & 0x10, (uint_qh >> (iqs + 13)) & 0x10);
const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]);
return vec4(((vui >> 0) & 0xF) | qh0.x, ((vui >> 4) & 0xF) | qh0.y, ((vui >> 8) & 0xF) | qh1.x, (vui >> 12) | qh1.y) * d + m; return vec4(((vui >> 0) & 0xF) | qh0.x, ((vui >> 4) & 0xF) | qh0.y, ((vui >> 8) & 0xF) | qh1.x, (vui >> 12) | qh1.y);
}
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), float(data_a[a_offset + ib].m));
} }
#endif #endif
#if defined(DATA_A_Q8_0) #if defined(DATA_A_Q8_0)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d); return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1]));
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
uint32_t v0 = data_a_packed16[a_offset + ib].qs[iqs/2]; uint32_t v0 = data_a_packed16[a_offset + ib].qs[iqs/2];
uint32_t v1 = data_a_packed16[a_offset + ib].qs[iqs/2 + 1]; uint32_t v1 = data_a_packed16[a_offset + ib].qs[iqs/2 + 1];
return vec4(int8_t(v0 & 0xFF), int8_t(v0 >> 8), int8_t(v1 & 0xFF), int8_t(v1 >> 8)); return vec4(int8_t(v0 & 0xFF), int8_t(v0 >> 8), int8_t(v1 & 0xFF), int8_t(v1 >> 8));
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), 0);
}
#endif #endif
#if defined(DATA_A_IQ4_NL) #if defined(DATA_A_IQ4_NL)
vec2 dequantize(uint ib, uint iqs, uint a_offset) { vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]); const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d; return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]);
} }
vec4 dequantize4(uint ib, uint iqs, uint a_offset) { vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]);
return vec4(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[(vui >> 4) & 0xF], kvalues_iq4nl[(vui >> 8) & 0xF], kvalues_iq4nl[vui >> 12]); return vec4(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[(vui >> 4) & 0xF], kvalues_iq4nl[(vui >> 8) & 0xF], kvalues_iq4nl[vui >> 12]);
} }
vec2 get_dm(uint ib, uint a_offset) {
return vec2(float(data_a[a_offset + ib].d), 0);
}
#endif #endif

View file

@ -31,6 +31,8 @@ void main() {
const uint y_offset = QUANT_R == 1 ? 1 : QUANT_K/2; const uint y_offset = QUANT_R == 1 ? 1 : QUANT_K/2;
vec2 v = dequantize(ib, iqs, 0); vec2 v = dequantize(ib, iqs, 0);
const vec2 dm = get_dm(ib, 0);
v = v * dm.x + dm.y;
data_d[d_offset + iybs + iqs ] = D_TYPE(v.x); data_d[d_offset + iybs + iqs ] = D_TYPE(v.x);
data_d[d_offset + iybs + iqs + y_offset] = D_TYPE(v.y); data_d[d_offset + iybs + iqs + y_offset] = D_TYPE(v.y);

View file

@ -59,18 +59,22 @@ void iter(inout FLOAT_TYPE temp[NUM_ROWS], const uint first_row, const uint num_
ibi += p.ncols; ibi += p.ncols;
#if K_PER_ITER == 8 #if K_PER_ITER == 8
const vec4 v = dequantize4(ib, iqs, a_offset); vec4 v = dequantize4(ib, iqs, a_offset);
const vec4 v2 = dequantize4(ib, iqs+(4/QUANT_R), a_offset); vec4 v2 = dequantize4(ib, iqs+(4/QUANT_R), a_offset);
FLOAT_TYPE rowtmp = 0;
const vec2 dm = get_dm(ib, a_offset);
if (dm.y != 0) { // quant has min component
v = v * dm.x + dm.y;
v2 = v2 * dm.x + dm.y;
}
// matrix multiplication // matrix multiplication
rowtmp += dot(bv0, v); FLOAT_TYPE rowtmp = dot(bv0, v);
rowtmp += dot(bv1, v2); rowtmp += dot(bv1, v2);
#if !defined(DATA_A_Q4_1) && !defined(DATA_A_Q5_1) if (dm.y == 0)
const float d = float(data_a[a_offset + ib].d); rowtmp *= dm.x;
rowtmp *= d;
#endif
temp[n] += rowtmp; temp[n] += rowtmp;
#else #else
const vec2 v = dequantize(ib, iqs, a_offset); const vec2 v = dequantize(ib, iqs, a_offset);