diff --git a/ggml-vulkan-shaders.hpp b/ggml-vulkan-shaders.hpp index 2ec5d1eb6..cf52927b3 100644 --- a/ggml-vulkan-shaders.hpp +++ b/ggml-vulkan-shaders.hpp @@ -171,7 +171,6 @@ v = v * d; const std::string mulmat_head = R"( #version 450 -#extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : enable #extension GL_EXT_shader_16bit_storage : require @@ -185,7 +184,7 @@ const std::string mulmat_head = R"( const std::string mulmat_body = R"( layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; -layout (binding = 0, scalar) readonly buffer A { A_TYPE data_a[]; }; +layout (binding = 0) readonly buffer A { A_TYPE data_a[]; }; layout (binding = 1) readonly buffer B { B_TYPE data_b[]; }; layout (binding = 2) writeonly buffer D { D_TYPE data_d[]; }; @@ -393,7 +392,6 @@ void main() { const std::string dequant_head = R"( #version 450 -#extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require @@ -402,7 +400,7 @@ const std::string dequant_head = R"( const std::string dequant_body = R"( layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; -layout (binding = 0, scalar) readonly buffer A { A_TYPE x[]; }; +layout (binding = 0) readonly buffer A { A_TYPE x[]; }; layout (binding = 1) writeonly buffer D { D_TYPE y[]; }; layout (push_constant) uniform parameter @@ -444,7 +442,6 @@ void main() { const std::string mul_mat_vec_head = R"( #version 450 -#extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : enable #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_shader_8bit_storage : require @@ -453,7 +450,7 @@ const std::string mul_mat_vec_head = R"( const std::string mul_mat_vec_body = R"( layout(local_size_x = QUANT_K, local_size_y = 1, local_size_z = 1) in; -layout (binding = 0, scalar) readonly buffer A { A_TYPE x[]; }; +layout (binding = 0) readonly buffer A { A_TYPE x[]; }; layout (binding = 1) readonly buffer B { B_TYPE y[]; }; layout (binding = 2) writeonly buffer D { D_TYPE dst[]; }; diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index d45d9ee26..d61e1d058 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -796,7 +796,6 @@ void ggml_vk_test_transfer(size_t ne); void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size); void ggml_vk_test_matmul_f16(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size); void ggml_vk_test_buffer_write_zeropad(size_t m, size_t k, size_t align); -void ggml_vk_test_f32_to_f16(size_t m, size_t k); void ggml_vk_init(void) { #ifdef VK_DEBUG @@ -953,11 +952,6 @@ void ggml_vk_init(void) { ggml_vk_test_buffer_write_zeropad(233, 97, 1); ggml_vk_test_buffer_write_zeropad(256, 128, 1); - ggml_vk_test_f32_to_f16(214, 256); - ggml_vk_test_f32_to_f16(256, 2048); - ggml_vk_test_f32_to_f16(24, 1000); - ggml_vk_test_f32_to_f16(24, 24); - int step = 16; for (size_t m = step; m < 64; m += step) { ggml_vk_test_transfer(1024 * 1024 * m); @@ -2640,76 +2634,6 @@ void ggml_vk_test_transfer(size_t ne) { free(x); free(y); } -void ggml_vk_test_f32_to_f16(size_t m, size_t k) { -#ifdef VK_DEBUG - std::cerr << "ggml_vk_test_transfer(" << ne << ")" << std::endl; -#endif - // Check transfers are correct - const uint32_t ne = m * k; - vk_buffer d_X = ggml_vk_create_buffer(sizeof(float) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal); - vk_buffer d_Y = ggml_vk_create_buffer(sizeof(ggml_fp16_t) * ne, vk::MemoryPropertyFlagBits::eDeviceLocal); - - float* x = (float *) malloc(sizeof(float) * ne); - ggml_fp16_t* y = (ggml_fp16_t *) malloc(sizeof(ggml_fp16_t) * ne); - - for (size_t i = 0; i < ne; i++) { - x[i] = rand() / (float)RAND_MAX; - } - - ggml_vk_pipeline_allocate_descriptor_sets(vk_pipeline_f32_to_f16, 1); - - auto begin = std::chrono::high_resolution_clock::now(); - - ggml_vk_buffer_write(&d_X, 0, x, sizeof(float) * ne, vk_device.transfer_queues[0]); - - vk_device.transfer_queues[0].queue.waitIdle(); - - auto end = std::chrono::high_resolution_clock::now(); - - double ms_to_gpu = std::chrono::duration_cast(end-begin).count() / 1000.0; - - begin = std::chrono::high_resolution_clock::now(); - - std::vector seqs; - vk_submission s = ggml_vk_begin_submission(vk_device.compute_queue); - const std::vector pc = { (int)m, (int)k, (int)k, (int)k }; - ggml_vk_sync_buffers(s.buffer, { { d_X, 0, (uint32_t)sizeof(float) * ne } }, vk_device.compute_queue, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, false); - ggml_vk_sync_buffers(s.buffer, { { d_Y, 0, (uint32_t)sizeof(ggml_fp16_t) * ne} }, vk_device.compute_queue, vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eShaderWrite, false); - ggml_vk_dispatch_pipeline(s, vk_pipeline_f32_to_f16, { { d_X, 0, (uint32_t)sizeof(float) * ne }, { d_Y, 0, (uint32_t)sizeof(ggml_fp16_t) * ne } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1}); - ggml_vk_end_submission(s, {}, {}); - seqs.push_back({ s }); - - ggml_vk_submit(vk_device.compute_queue, seqs, VK_NULL_HANDLE); - - vk_device.compute_queue.queue.waitIdle(); - - end = std::chrono::high_resolution_clock::now(); - - double ms_convert = std::chrono::duration_cast(end-begin).count() / 1000.0; - - begin = std::chrono::high_resolution_clock::now(); - - ggml_vk_buffer_read(&d_Y, 0, y, sizeof(ggml_fp16_t) * ne, vk_device.transfer_queues[1]); - - end = std::chrono::high_resolution_clock::now(); - - double ms_from_gpu = std::chrono::duration_cast(end-begin).count() / 1000.0; - - double avg_err = 0.0; - for (size_t i = 0; i < ne; i++) { - avg_err += std::fabs(x[i] - ggml_fp16_to_fp32(y[i])); - } - - std::cerr << "TEST F32 TO F16 " << ms_to_gpu << "ms to_gpu " << ms_convert << "ms convert " << ms_from_gpu << "ms from gpu avg_err=" << avg_err / ne << std::endl; - - ggml_vk_destroy_buffer(d_X); - ggml_vk_destroy_buffer(d_Y); - - ggml_vk_pipeline_cleanup(vk_pipeline_f32_to_f16); - - free(x); - free(y); -} void ggml_vk_test_matmul_f32(size_t m, size_t n, size_t k, size_t num_it, int split_k, int shader_size) { #ifdef VK_DEBUG