llama : correction of the attn.v.weight quantization for IQ3_XS (#6209)
IQ3_XS was not mentioned, IQ3_S and IQ3_M were present twice. That PR corrects this in the manner which was probably intended initially.
This commit is contained in:
		
							parent
							
								
									f77a8ffd3b
								
							
						
					
					
						commit
						e80f06d2a1
					
				
					 1 changed files with 1 additions and 7 deletions
				
			
		|  | @ -12027,13 +12027,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n | |||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) { | ||||
|             new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { | ||||
|             new_type = GGML_TYPE_Q4_K; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { | ||||
|             new_type = GGML_TYPE_Q4_K; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) { | ||||
|         else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) { | ||||
|             new_type = GGML_TYPE_Q4_K; | ||||
|         } | ||||
|         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue