Fixed embd when offloading non-repeating layers

This commit is contained in:
JohannesGaessler 2023-06-16 15:34:46 +02:00
parent a09f9195be
commit 1170a95732

View file

@ -1654,7 +1654,7 @@ static bool llama_eval_internal(
// cur = cur*norm(broadcasted)
cur = ggml_mul(ctx0, cur, model.norm);
offload_func_nr(cur);
// offload_func_nr(cur); // TODO CPU + GPU mirrored backend
ggml_set_name(cur, "result_norm");
embeddings = cur;