Vulkan IQ4_NL Support (#8613)

* Fix Vulkan matmul tests compile errors

* Add Vulkan IQ4_NL support

* Fix Vulkan DeepSeek-Coder-V2-Lite MoE support
This commit is contained in:
0cc4m 2024-07-23 10:56:49 +02:00 committed by GitHub
parent 46e47417aa
commit 751fcfc6c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 222 additions and 193 deletions

View file

@ -58,3 +58,11 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
}
#endif
#if defined(DATA_A_IQ4_NL)
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
const float d = float(data_a[a_offset + ib].d);
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
}
#endif

View file

@ -0,0 +1,30 @@
#version 450
#include "dequant_head.comp"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0) readonly buffer A {block_iq4_nl data_a[];};
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
void main() {
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
const uint tid = gl_LocalInvocationID.x % 64;
const uint il = tid/32;
const uint ir = tid%32;
const uint ib = 32*i + ir;
if (ib >= p.nel / 32) {
return;
}
const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;
const float d = float(data_a[ib].d);
[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]);
data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]);
}
}

View file

@ -18,15 +18,13 @@ void main() {
return;
}
const uint b_idx = 1024*i + 32*ir + 8*il;
const uint q_idx = 8*il;
const uint b_idx = 1024*i + 32*ir + q_idx;
const float d = float(data_a[ib].d);
const float dm = -8.0f * d;
const uint q_idx = 8*il;
[[unroll]] for (uint l = 0; l < 8; ++l) {
data_b[b_idx + l + 0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + dm);
data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >> 4) + dm);
data_b[b_idx + l + 0] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] & 0xF) - 8.0f));
data_b[b_idx + l + 16] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] >> 4) - 8.0f));
}
}

View file

@ -71,7 +71,7 @@ shared FLOAT_TYPE buf_a[BM * (BK+1)];
shared FLOAT_TYPE buf_b[BN * (BK+1)];
#ifdef MUL_MAT_ID
shared u16vec2 row_ids[2048];
shared u16vec2 row_ids[3072];
#endif
void main() {
@ -380,6 +380,19 @@ void main() {
buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));
#elif defined(DATA_A_IQ4_NL)
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a;
const uint ib = idx / 16;
const uint iqs = idx & 0xF;
const float d = float(data_a[ib].d);
const uint vui = uint(data_a[ib].qs[iqs]);
const vec2 v = vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
buf_a[buf_idx + 16] = FLOAT_TYPE(v.y);
#endif
}
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {

View file

@ -177,3 +177,24 @@ struct block_q6_K
#define A_TYPE block_q6_K
#endif
// IQuants
#if defined(DATA_A_IQ4_NL)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 2
struct block_iq4_nl
{
float16_t d;
uint8_t qs[QUANT_K/2];
};
#define A_TYPE block_iq4_nl
const int8_t kvalues_iq4nl[16] = {
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
};
#endif

View file

@ -52,7 +52,8 @@ const std::vector<std::string> type_names = {
"q3_k",
"q4_k",
"q5_k",
"q6_k"
"q6_k",
"iq4_nl"
};
void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {