wqkv hack
This commit is contained in:
parent
370990abce
commit
66a6dbf702
1 changed files with 9 additions and 2 deletions
|
@ -2008,7 +2008,14 @@ static void llm_load_tensors(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: For 1B, we need e.g {2048, 6144} and the usual calculation gives us e.g {2048, 2176}.
|
||||||
|
// I think this is because we skip the QKV reshaping in the conversion script (maybe because parallel attention is disabled?)
|
||||||
|
if (model.type == MODEL_1B) {
|
||||||
|
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd * 3}, backend_split);
|
||||||
|
} else {
|
||||||
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, backend_split);
|
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, backend_split);
|
||||||
|
}
|
||||||
|
|
||||||
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
|
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
|
||||||
|
|
||||||
layer.w2 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split);
|
layer.w2 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue