diff --git a/llama.cpp b/llama.cpp
index b3161b521..a2ceac163 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8367,7 +8367,9 @@ static struct ggml_cgraph * llama_build_graph(
             }
         }
 
-        if (il != -1) {
+        // norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
+        // to fix this, we assign the norm layer manually to the backend of its layer
+        if (il != -1 && strcmp(name, "norm") == 0) {
             for (auto * backend : lctx.backends) {
                 if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {
                     ggml_backend_sched_set_tensor_backend(lctx.sched, cur, backend);