better sanity check skipping for QRWKV6 in llama-quant
thanks @compilade Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
d8a304c2ef
commit
324afba5cc
1 changed files with 2 additions and 2 deletions
|
@ -621,8 +621,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
|
|
||||||
qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
|
qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
|
||||||
|
|
||||||
// sanity checks
|
// sanity checks for models that have attention layers
|
||||||
if (!llama_model_is_recurrent(&model))
|
if (qs.n_attention_wv != 0)
|
||||||
{
|
{
|
||||||
const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
|
const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
|
||||||
// attention layers have a non-zero number of kv heads
|
// attention layers have a non-zero number of kv heads
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue