More little fixes and stuff

This commit is contained in:
niansa 2023-06-23 20:37:58 +02:00
parent e0814f86a2
commit 5d5f66d1d9

View file

@ -165,14 +165,15 @@ const std::shared_ptr<kp::Tensor> & ggml_vk_get_tensor(struct ggml_kompute_conte
} }
static std::vector<uint32_t> compileSource(const std::string& source) { static std::vector<uint32_t> compileSource(const std::string& source, const char *debug_name) {
printf("%s: Compiling compute program: %s\n", __func__, debug_name);
static std::mutex mutex; static std::mutex mutex;
std::lock_guard<std::mutex> L(mutex); std::lock_guard<std::mutex> L(mutex);
//FIXME: Terrible solution!!!! //FIXME: Terrible solution!!!!
std::ofstream fileOut("tmp_kp_shader.comp"); std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source; fileOut << source;
fileOut.close(); fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str())) if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv > /dev/null").c_str()))
throw std::runtime_error("Error running glslangValidator command"); throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary); std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer; std::vector<char> buffer;
@ -251,7 +252,7 @@ void ggml_vk_dequantize_row_q4_0(const void *x_, float *y, int k) {
static const int qk = QK4_0; static const int qk = QK4_0;
const unsigned nb = k / qk; const unsigned nb = k / qk;
const unsigned y_size = nb*qk; const unsigned y_size = nb*qk;
const static auto spirv = compileSource(program_source_head+program_dequantize_row_q4_0); const static auto spirv = compileSource(program_source_head+program_dequantize_row_q4_0, __func__);
const auto x = reinterpret_cast<const block_q4_0*>(x_); const auto x = reinterpret_cast<const block_q4_0*>(x_);
@ -301,7 +302,7 @@ void ggml_vk_dequantize_row_q4_1(const void *x_, float *y, int k) {
static const int qk = QK4_1; static const int qk = QK4_1;
const unsigned nb = k / qk; const unsigned nb = k / qk;
const unsigned y_size = nb*qk; const unsigned y_size = nb*qk;
const static auto spirv = compileSource(program_source_head+program_dequantize_row_q4_1); const static auto spirv = compileSource(program_source_head+program_dequantize_row_q4_1, __func__);
const auto x = reinterpret_cast<const block_q4_1*>(x_); const auto x = reinterpret_cast<const block_q4_1*>(x_);
@ -352,7 +353,7 @@ void ggml_vk_abmath(kp::Sequence& seq,
const static auto spirv = compileSource(program_source_head+ const static auto spirv = compileSource(program_source_head+
"#define MATH_OP "+std::string(1, mathOP)+"\n" "#define MATH_OP "+std::string(1, mathOP)+"\n"
"#define ROW_OP "+(row?"% pcs.row":"")+'\n'+ "#define ROW_OP "+(row?"% pcs.row":"")+'\n'+
program_abmath); program_abmath, __func__);
struct PushConstants { struct PushConstants {
uint32_t inAOff, inBOff, outOff, row; uint32_t inAOff, inBOff, outOff, row;
@ -370,7 +371,6 @@ void ggml_vk_add(Args&&... args) {
template <typename... Args> template <typename... Args>
void ggml_vk_mul(Args&&... args) { void ggml_vk_mul(Args&&... args) {
printf("%s: multiplying...\n", __func__);
return ggml_vk_abmath<'*'>(std::forward<Args>(args)...); return ggml_vk_abmath<'*'>(std::forward<Args>(args)...);
} }
@ -398,7 +398,7 @@ void ggml_vk_scale(kp::Sequence& seq,
const std::shared_ptr<kp::Tensor>& in, uint32_t inOff, const std::shared_ptr<kp::Tensor>& in, uint32_t inOff,
const std::shared_ptr<kp::Tensor>& out, uint32_t outOff, const std::shared_ptr<kp::Tensor>& out, uint32_t outOff,
uint32_t size, float scale) { uint32_t size, float scale) {
const static auto spirv = compileSource(program_source_head+program_scale); const static auto spirv = compileSource(program_source_head+program_scale, __func__);
struct PushConstants { struct PushConstants {
uint32_t inOff, outOff; uint32_t inOff, outOff;
@ -445,7 +445,7 @@ void main() {
template <typename... Args> template <typename... Args>
void ggml_vk_silu(Args&&... args) { void ggml_vk_silu(Args&&... args) {
const static auto spirv = compileSource(program_source_head+program_silu); const static auto spirv = compileSource(program_source_head+program_silu, __func__);
ggml_vk_xxlu(spirv, std::forward<Args>(args)...); ggml_vk_xxlu(spirv, std::forward<Args>(args)...);
} }
@ -471,7 +471,7 @@ void main() {
template <typename... Args> template <typename... Args>
void ggml_vk_relu(Args&&... args) { void ggml_vk_relu(Args&&... args) {
const static auto spirv = compileSource(program_source_head+program_relu); const static auto spirv = compileSource(program_source_head+program_relu, __func__);
ggml_vk_xxlu(spirv, std::forward<Args>(args)...); ggml_vk_xxlu(spirv, std::forward<Args>(args)...);
} }
@ -498,7 +498,7 @@ void main() {
template <typename... Args> template <typename... Args>
void ggml_vk_gelu(Args&&... args) { void ggml_vk_gelu(Args&&... args) {
const static auto spirv = compileSource(program_source_head+program_gelu); const static auto spirv = compileSource(program_source_head+program_gelu, __func__);
ggml_vk_xxlu(spirv, std::forward<Args>(args)...); ggml_vk_xxlu(spirv, std::forward<Args>(args)...);
} }
@ -514,13 +514,9 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph
for (auto& sequence : sequences) { for (auto& sequence : sequences) {
sequence = mgr.sequence(); sequence = mgr.sequence();
} }
std::vector<std::thread> threads(n_seq);
for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) { for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) {
const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq; const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq;
threads[seq_idx] = std::thread([&, seq_idx, n_nodes_per_seq] () {
size_t offs_src0 = 0; size_t offs_src0 = 0;
size_t offs_src1 = 0; size_t offs_src1 = 0;
size_t offs_dst = 0; size_t offs_dst = 0;
@ -571,9 +567,11 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT; const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
std::shared_ptr<kp::Tensor> id_src0 = src0 ? ggml_vk_get_tensor(ctx, src0) : nullptr;
std::shared_ptr<kp::Tensor> id_src1 = src1 ? ggml_vk_get_tensor(ctx, src1) : nullptr; const static std::shared_ptr<kp::Tensor> nullTensor = nullptr;
std::shared_ptr<kp::Tensor> id_dst = dst ? ggml_vk_get_tensor(ctx, dst) : nullptr; const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor(ctx, src0) : nullTensor;
const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor(ctx, src1) : nullTensor;
const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor(ctx, dst) : nullTensor;
switch (dst->op) { switch (dst->op) {
case GGML_OP_RESHAPE: case GGML_OP_RESHAPE:
@ -613,21 +611,20 @@ void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph
{ {
ggml_vk_gelu(seq, id_src0, offs_src0, id_dst, offs_dst, ggml_nelements(dst)); ggml_vk_gelu(seq, id_src0, offs_src0, id_dst, offs_dst, ggml_nelements(dst));
} break; } break;
default: //default:
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); //fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
//GGML_ASSERT(false); //GGML_ASSERT(false);
} }
} }
// Evaluate sequence // Evaluate sequence
seq.eval(); seq.evalAsync();
});
} }
// Wait for all threads to finish // Wait for all sequences to finish
for (auto& thread : threads) { for (auto& sequence : sequences) {
if (thread.joinable()) if (sequence->isRunning())
thread.join(); sequence->evalAwait();
} }
} }