better sanity check skipping for QRWKV6 in llama-quant
thanks @compilade Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
d8a304c2ef
commit
324afba5cc
1 changed files with 2 additions and 2 deletions
|
@ -621,8 +621,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
|
||||
qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
|
||||
|
||||
// sanity checks
|
||||
if (!llama_model_is_recurrent(&model))
|
||||
// sanity checks for models that have attention layers
|
||||
if (qs.n_attention_wv != 0)
|
||||
{
|
||||
const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
|
||||
// attention layers have a non-zero number of kv heads
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue