From 9e7cecc1c8b7e4d79b3ccf3eee4ff44f4815c00a Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 13 Mar 2024 12:18:09 +0100 Subject: [PATCH] llama : fix norm backend --- llama.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index b3161b521..a2ceac163 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8367,7 +8367,9 @@ static struct ggml_cgraph * llama_build_graph( } } - if (il != -1) { + // norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends + // to fix this, we assign the norm layer manually to the backend of its layer + if (il != -1 && strcmp(name, "norm") == 0) { for (auto * backend : lctx.backends) { if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) { ggml_backend_sched_set_tensor_backend(lctx.sched, cur, backend);