Cleanups
This commit is contained in:
parent
2f3fe0c0a4
commit
3b3d30e4ad
1 changed files with 15 additions and 22 deletions
|
@ -9,6 +9,10 @@
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <kompute/Kompute.hpp>
|
#include <kompute/Kompute.hpp>
|
||||||
|
|
||||||
|
#ifndef __STDC_IEC_559__
|
||||||
|
#error Your C implementation is not IEC 559 compliant, which is required for proper Vulkan interop.
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef ggml_fp16_t half;
|
typedef ggml_fp16_t half;
|
||||||
|
|
||||||
#define MULTILINE_QUOTE(...) #__VA_ARGS__
|
#define MULTILINE_QUOTE(...) #__VA_ARGS__
|
||||||
|
@ -53,25 +57,20 @@ layout(binding = 0) buffer tensorBlockQ4_0D { float16_t x_d[]; };
|
||||||
layout(binding = 1) buffer tensorBlockQ4_0QS { uint8_t x_qs[]; };
|
layout(binding = 1) buffer tensorBlockQ4_0QS { uint8_t x_qs[]; };
|
||||||
layout(binding = 2) buffer tensorY { float y[]; };
|
layout(binding = 2) buffer tensorY { float y[]; };
|
||||||
|
|
||||||
// Push constants
|
|
||||||
layout(push_constant) uniform PushConstants {
|
|
||||||
int k;
|
|
||||||
} pcs;
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
const int qk = QK4_0;
|
const int qk = QK4_0;
|
||||||
|
|
||||||
const int i = int(gl_GlobalInvocationID.x);
|
const int i = int(gl_GlobalInvocationID.x);
|
||||||
const int j = int(gl_GlobalInvocationID.y);
|
const int j = int(gl_GlobalInvocationID.y);
|
||||||
|
|
||||||
const float16_t d = x_d[i];
|
const float d = float(x_d[i]);
|
||||||
const uint8_t qs = x_qs[i * (QK4_0 / 2) + j];
|
const uint8_t qs = x_qs[i * (qk / 2) + j];
|
||||||
|
|
||||||
const int x0 = (qs & 0x0F) - 8;
|
const int x0 = (qs & 0x0F) - 8;
|
||||||
const int x1 = (qs >> 4) - 8;
|
const int x1 = (qs >> 4) - 8;
|
||||||
|
|
||||||
y[i*qk + j + 0 ] = float16_t(x0)*d;
|
y[i*qk + j + 0 ] = float(x0)*d;
|
||||||
y[i*qk + j + qk/2] = float16_t(x1)*d;
|
y[i*qk + j + qk/2] = float(x1)*d;
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -97,20 +96,20 @@ void ggml_vk_dequantize_row_q4_0(const void *x_, float *y, int k) {
|
||||||
|
|
||||||
const auto x = reinterpret_cast<const block_q4_0*>(x_);
|
const auto x = reinterpret_cast<const block_q4_0*>(x_);
|
||||||
|
|
||||||
|
assert(k % qk == 0);
|
||||||
|
|
||||||
auto getVecBlockQ4_0D = [] (const block_q4_0 *x) {
|
auto getVecBlockQ4_0D = [] (const block_q4_0 *x) {
|
||||||
std::vector<half> fres;
|
std::vector<half> fres(nb);
|
||||||
fres.reserve(nb);
|
|
||||||
for (unsigned it = 0; it != nb; it++) {
|
for (unsigned it = 0; it != nb; it++) {
|
||||||
fres.push_back(x[it].d);
|
fres[it] = x[it].d;
|
||||||
}
|
}
|
||||||
return fres;
|
return fres;
|
||||||
};
|
};
|
||||||
auto getVecBlockQ4_0QS = [] (const block_q4_0 *x) {
|
auto getVecBlockQ4_0QS = [] (const block_q4_0 *x) {
|
||||||
std::vector<uint8_t> fres;
|
std::vector<uint8_t> fres(nb*(qk/2));
|
||||||
fres.resize(nb*(qk/2));
|
|
||||||
for (unsigned x_it = 0; x_it != nb; x_it++) {
|
for (unsigned x_it = 0; x_it != nb; x_it++) {
|
||||||
for (unsigned qs_it = 0; qs_it != qk / 2; qs_it++) {
|
for (unsigned qs_it = 0; qs_it != qk / 2; qs_it++) {
|
||||||
fres.push_back(x[x_it].qs[qs_it]);
|
fres[x_it * (qk / 2) + qs_it] = x[x_it].qs[qs_it];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return fres;
|
return fres;
|
||||||
|
@ -120,15 +119,9 @@ void ggml_vk_dequantize_row_q4_0(const void *x_, float *y, int k) {
|
||||||
const auto tensorBlockQ4_0QS = mgr.tensorT<uint8_t>(getVecBlockQ4_0QS(x));
|
const auto tensorBlockQ4_0QS = mgr.tensorT<uint8_t>(getVecBlockQ4_0QS(x));
|
||||||
const auto tensorY = mgr.tensor(std::vector<float>(y, y+y_size));
|
const auto tensorY = mgr.tensor(std::vector<float>(y, y+y_size));
|
||||||
|
|
||||||
struct PushConsts {
|
|
||||||
int k;
|
|
||||||
} pushConsts {
|
|
||||||
k
|
|
||||||
};
|
|
||||||
|
|
||||||
mgr.sequence()
|
mgr.sequence()
|
||||||
->record<kp::OpTensorSyncDevice>({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY})
|
->record<kp::OpTensorSyncDevice>({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY})
|
||||||
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY}, spirv, {nb, qk/2, 0}, {}, {0}), std::vector<PushConsts>{pushConsts})
|
->record<kp::OpAlgoDispatch>(mgr.algorithm({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY}, spirv, {nb, qk/2, 0}))
|
||||||
->record<kp::OpTensorSyncLocal>({tensorY})
|
->record<kp::OpTensorSyncLocal>({tensorY})
|
||||||
->eval();
|
->eval();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue