Remove unnecessary scalar layout extension

This commit is contained in:
0cc4m 2023-08-19 17:53:48 +02:00
parent 7e88677af4
commit 5ae5d2bd5b
2 changed files with 3 additions and 82 deletions

View file

@ -171,7 +171,6 @@ v = v * d;
const std::string mulmat_head = R"( const std::string mulmat_head = R"(
#version 450 #version 450
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_control_flow_attributes : enable #extension GL_EXT_control_flow_attributes : enable
#extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_16bit_storage : require
@ -185,7 +184,7 @@ const std::string mulmat_head = R"(
const std::string mulmat_body = R"( const std::string mulmat_body = R"(
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0, scalar) readonly buffer A { A_TYPE data_a[]; }; layout (binding = 0) readonly buffer A { A_TYPE data_a[]; };
layout (binding = 1) readonly buffer B { B_TYPE data_b[]; }; layout (binding = 1) readonly buffer B { B_TYPE data_b[]; };
layout (binding = 2) writeonly buffer D { D_TYPE data_d[]; }; layout (binding = 2) writeonly buffer D { D_TYPE data_d[]; };
@ -393,7 +392,6 @@ void main() {
const std::string dequant_head = R"( const std::string dequant_head = R"(
#version 450 #version 450
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_control_flow_attributes : require #extension GL_EXT_control_flow_attributes : require
#extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
@ -402,7 +400,7 @@ const std::string dequant_head = R"(
const std::string dequant_body = R"( const std::string dequant_body = R"(
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0, scalar) readonly buffer A { A_TYPE x[]; }; layout (binding = 0) readonly buffer A { A_TYPE x[]; };
layout (binding = 1) writeonly buffer D { D_TYPE y[]; }; layout (binding = 1) writeonly buffer D { D_TYPE y[]; };
layout (push_constant) uniform parameter layout (push_constant) uniform parameter
@ -444,7 +442,6 @@ void main() {
const std::string mul_mat_vec_head = R"( const std::string mul_mat_vec_head = R"(
#version 450 #version 450
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_control_flow_attributes : enable #extension GL_EXT_control_flow_attributes : enable
#extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_8bit_storage : require #extension GL_EXT_shader_8bit_storage : require
@ -453,7 +450,7 @@ const std::string mul_mat_vec_head = R"(
const std::string mul_mat_vec_body = R"( const std::string mul_mat_vec_body = R"(
layout(local_size_x = QUANT_K, local_size_y = 1, local_size_z = 1) in; layout(local_size_x = QUANT_K, local_size_y = 1, local_size_z = 1) in;
layout (binding = 0, scalar) readonly buffer A { A_TYPE x[]; }; layout (binding = 0) readonly buffer A { A_TYPE x[]; };
layout (binding = 1) readonly buffer B { B_TYPE y[]; }; layout (binding = 1) readonly buffer B { B_TYPE y[]; };
layout (binding = 2) writeonly buffer D { D_TYPE dst[]; }; layout (binding = 2) writeonly buffer D { D_TYPE dst[]; };

View file

@ -796,7 +796,6 @@ void ggml_vk_test_transfer(size_t ne);
void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size); void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size);
void ggml_vk_test_matmul_f16(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size); void ggml_vk_test_matmul_f16(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size);
void ggml_vk_test_buffer_write_zeropad(size_t m, size_t k, size_t align); void ggml_vk_test_buffer_write_zeropad(size_t m, size_t k, size_t align);
void ggml_vk_test_f32_to_f16(size_t m, size_t k);
void ggml_vk_init(void) { void ggml_vk_init(void) {
#ifdef VK_DEBUG #ifdef VK_DEBUG
@ -953,11 +952,6 @@ void ggml_vk_init(void) {
ggml_vk_test_buffer_write_zeropad(233, 97, 1); ggml_vk_test_buffer_write_zeropad(233, 97, 1);
ggml_vk_test_buffer_write_zeropad(256, 128, 1); ggml_vk_test_buffer_write_zeropad(256, 128, 1);
ggml_vk_test_f32_to_f16(214, 256);
ggml_vk_test_f32_to_f16(256, 2048);
ggml_vk_test_f32_to_f16(24, 1000);
ggml_vk_test_f32_to_f16(24, 24);
int step = 16; int step = 16;
for (size_t m = step; m < 64; m += step) { for (size_t m = step; m < 64; m += step) {
ggml_vk_test_transfer(1024 * 1024 * m); ggml_vk_test_transfer(1024 * 1024 * m);
@ -2640,76 +2634,6 @@ void ggml_vk_test_transfer(size_t ne) {
free(x); free(x);
free(y); free(y);
} }
void ggml_vk_test_f32_to_f16(size_t m, size_t k) {
#ifdef VK_DEBUG
std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl;
#endif
// Check transfers are correct
const uint32_t ne = m * k;
vk_buffer d_X = ggml_vk_create_buffer(sizeof(float) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
vk_buffer d_Y = ggml_vk_create_buffer(sizeof(ggml_fp16_t) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
float* x = (float *) malloc(sizeof(float) * ne);
ggml_fp16_t* y = (ggml_fp16_t *) malloc(sizeof(ggml_fp16_t) * ne);
for (size_t i = 0; i < ne; i++) {
x[i] = rand() / (float)RAND_MAX;
}
ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_f32_to_f16, 1);
auto begin = std::chrono::high_resolution_clock::now();
ggml_vk_buffer_write(&d_X, 0, x, sizeof(float) * ne, vk_device.transfer_queues[0]);
vk_device.transfer_queues[0].queue.waitIdle();
auto end = std::chrono::high_resolution_clock::now();
double ms_to_gpu = std::chrono::duration_cast<std::chrono::microseconds>(end-begin).count() / 1000.0;
begin = std::chrono::high_resolution_clock::now();
std::vector<vk_sequence> seqs;
vk_submission s = ggml_vk_begin_submission(vk_device.compute_queue);
const std::vector<int> pc = { (int)m, (int)k, (int)k, (int)k };
ggml_vk_sync_buffers(s.buffer, { { d_X, 0, (uint32_t)sizeof(float) * ne } }, vk_device.compute_queue, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, false);
ggml_vk_sync_buffers(s.buffer, { { d_Y, 0, (uint32_t)sizeof(ggml_fp16_t) * ne} }, vk_device.compute_queue, vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eShaderWrite, false);
ggml_vk_dispatch_pipeline(s, vk_pipeline_f32_to_f16, { { d_X, 0, (uint32_t)sizeof(float) * ne }, { d_Y, 0, (uint32_t)sizeof(ggml_fp16_t) * ne } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
ggml_vk_end_submission(s, {}, {});
seqs.push_back({ s });
ggml_vk_submit(vk_device.compute_queue, seqs, VK_NULL_HANDLE);
vk_device.compute_queue.queue.waitIdle();
end = std::chrono::high_resolution_clock::now();
double ms_convert = std::chrono::duration_cast<std::chrono::microseconds>(end-begin).count() / 1000.0;
begin = std::chrono::high_resolution_clock::now();
ggml_vk_buffer_read(&d_Y, 0, y, sizeof(ggml_fp16_t) * ne, vk_device.transfer_queues[1]);
end = std::chrono::high_resolution_clock::now();
double ms_from_gpu = std::chrono::duration_cast<std::chrono::microseconds>(end-begin).count() / 1000.0;
double avg_err = 0.0;
for (size_t i = 0; i < ne; i++) {
avg_err += std::fabs(x[i] - ggml_fp16_to_fp32(y[i]));
}
std::cerr << "TEST F32 TO F16 " << ms_to_gpu << "ms to_gpu " << ms_convert << "ms convert " << ms_from_gpu << "ms from gpu avg_err=" << avg_err / ne << std::endl;
ggml_vk_destroy_buffer(d_X);
ggml_vk_destroy_buffer(d_Y);
ggml_vk_pipeline_cleanup(vk_pipeline_f32_to_f16);
free(x);
free(y);
}
void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size) { void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size) {
#ifdef VK_DEBUG #ifdef VK_DEBUG