llama : fix compatibility with old 2 expert models (#6735)
This commit is contained in:
		
							parent
							
								
									3b8f1ec4b1
								
							
						
					
					
						commit
						c71bfd736e
					
				
					 1 changed files with 1 additions and 1 deletions
				
			
		|  | @ -4592,7 +4592,7 @@ static bool llm_load_tensors( | |||
|     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
 | ||||
| 
 | ||||
|     // for moe merged tensors
 | ||||
|     ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer; | ||||
|     ctx_size += ggml_tensor_overhead()*n_layer*3; | ||||
| 
 | ||||
|     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map; | ||||
|     for (auto & it : buft_layer_count) { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue