skip-unused: fix -ngl=1 case by ensure input & of view are offloaded consistently

This commit is contained in:
ochafik 2023-08-28 15:44:55 +01:00
parent f6a446ec01
commit 9f5b7813c6

View file

@ -2325,10 +2325,12 @@ static struct ggml_cgraph * llm_build_llama(
// Note that we do this even when N==1 so that we don't change the # nodes in the graph,
// otherwise for Metal we'd have to rebuild the concurrency list.
offload_func(cur);
cur = ggml_view_2d(ctx0, cur, n_embd, 1, cur->nb[1], (N - 1)*ggml_element_size(cur)*n_embd);
offload_func_kq(cur);
offload_func(cur);
ggml_set_name(cur, "cur-lastpos");
offload_func(inpSA);
inpSA = ggml_view_2d(ctx0, inpSA, n_embd, 1, inpSA->nb[1], (N - 1)*ggml_element_size(inpSA)*n_embd);
offload_func(inpSA);
ggml_set_name(inpSA, "inpSA-lastpos");