Vulkan Shader Refactor, Memory Debugging Option (#7947)
* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
This commit is contained in:
		
							parent
							
								
									0c7b3595b9
								
							
						
					
					
						commit
						7c7836d9d4
					
				
					 54 changed files with 25266 additions and 21885 deletions
				
			
		
							
								
								
									
										32
									
								
								vulkan-shaders/dequant_q4_1.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								vulkan-shaders/dequant_q4_1.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| #version 450 | ||||
| 
 | ||||
| #include "dequant_head.comp" | ||||
| 
 | ||||
| layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; | ||||
| 
 | ||||
| layout (binding = 0) readonly buffer A {block_q4_1 data_a[];}; | ||||
| layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; | ||||
| 
 | ||||
| void main() { | ||||
|     const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64; | ||||
| 
 | ||||
|     const uint tid = gl_LocalInvocationID.x % 64; | ||||
|     const uint il  = tid/32; | ||||
|     const uint ir  = tid%32; | ||||
|     const uint ib = 32*i + ir; | ||||
|     if (ib >= p.nel / 32) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const uint b_idx = 1024*i + 32*ir + 8*il; | ||||
| 
 | ||||
|     const float d = float(data_a[ib].d); | ||||
|     const float m = float(data_a[ib].m); | ||||
| 
 | ||||
|     const uint q_idx = 8*il; | ||||
| 
 | ||||
|     [[unroll]] for (uint l = 0; l < 8; ++l) { | ||||
|         data_b[b_idx + l +  0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + m); | ||||
|         data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >>  4) + m); | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue