llama : fix command-r inference when omitting outputs (#6367)
This commit is contained in:
		
							parent
							
								
									28cb9a09c4
								
							
						
					
					
						commit
						0308f5e3d7
					
				
					 1 changed files with 3 additions and 2 deletions
				
			
		|  | @ -9152,8 +9152,9 @@ struct llm_build_context { | |||
|             if (il == n_layer - 1) { | ||||
|                 // skip computing output for unused tokens
 | ||||
|                 struct ggml_tensor * inp_out_ids = build_inp_out_ids(); | ||||
|                 cur  = ggml_get_rows(ctx0,  cur, inp_out_ids); | ||||
|                 inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); | ||||
|                 cur     = ggml_get_rows(ctx0,     cur, inp_out_ids); | ||||
|                 inpL    = ggml_get_rows(ctx0,    inpL, inp_out_ids); | ||||
|                 ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); | ||||
|             } | ||||
| 
 | ||||
|             struct ggml_tensor * attn_out = cur; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue