Vulkan Shader Refactor, Memory Debugging Option (#7947)
* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
This commit is contained in:
		
							parent
							
								
									0c7b3595b9
								
							
						
					
					
						commit
						7c7836d9d4
					
				
					 54 changed files with 25266 additions and 21885 deletions
				
			
		
							
								
								
									
										37
									
								
								vulkan-shaders/rope_neox.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								vulkan-shaders/rope_neox.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,37 @@ | |||
| #version 450 | ||||
| 
 | ||||
| #include "rope_head.comp" | ||||
| 
 | ||||
| void main() { | ||||
|     const uint col = gl_GlobalInvocationID.y * 2; | ||||
|     const uint row = gl_GlobalInvocationID.x; | ||||
| 
 | ||||
|     if (col >= p.ncols) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (col >= p.n_dims) { | ||||
|         const uint i = row*p.ncols + col; | ||||
| 
 | ||||
|         data_d[i + 0] = data_a[i + 0]; | ||||
|         data_d[i + 1] = data_a[i + 1]; | ||||
| 
 | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const uint i  = row*p.ncols + col/2; | ||||
|     const uint i2 = row/p.p_delta_rows; | ||||
| 
 | ||||
|     const float theta_base = data_pos[i2] * pow(p.theta_scale, col/2.0f); | ||||
| 
 | ||||
|     const float freq_factor = p.has_ff != 0 ? data_ff[col/2] : 1.0f; | ||||
| 
 | ||||
|     float cos_theta, sin_theta; | ||||
|     rope_yarn(theta_base / freq_factor, col, cos_theta, sin_theta); | ||||
| 
 | ||||
|     const float x0 = float(data_a[i + 0]); | ||||
|     const float x1 = float(data_a[i + p.n_dims/2]); | ||||
| 
 | ||||
|     data_d[i + 0]        = D_TYPE(x0*cos_theta - x1*sin_theta); | ||||
|     data_d[i + p.n_dims/2] = D_TYPE(x0*sin_theta + x1*cos_theta); | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue