llama : add missing kv clear in llama_beam_search (#6664)
This commit is contained in:
		
							parent
							
								
									04fbc5f23e
								
							
						
					
					
						commit
						1958f7e06c
					
				
					 1 changed files with 5 additions and 0 deletions
				
			
		|  | @ -13063,6 +13063,11 @@ struct llama_beam_search_data { | |||
|             } | ||||
|             llama_logit_info logit_info(ctx); | ||||
|             std::vector<llama_token_data> next_tokens = logit_info.top_k(n_beams); | ||||
| 
 | ||||
|             // Clear the kv slot so that other beams may try different tokens at this position. The llama_decode()
 | ||||
|             // call in loop() will conclusively fill in the kv slot once the beams converge at this position.
 | ||||
|             llama_kv_cache_seq_rm(ctx, 0, n_past, -1); | ||||
| 
 | ||||
|             size_t i=0; | ||||
|             if (next_beams.size() < n_beams) { | ||||
|                 for (; next_beams.size() < n_beams ; ++i) { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue