diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index eb628d9d8..a22980b0f 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -646,9 +646,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.OUTPUT_NORM,
         MODEL_TENSOR.OUTPUT,
         MODEL_TENSOR.ATTN_QKV,
-        MODEL_TENSOR.ATTN_OUT,
         MODEL_TENSOR.ATTN_NORM,
         MODEL_TENSOR.ATTN_NORM_2,
+        MODEL_TENSOR.ATTN_OUT_NORM,
         MODEL_TENSOR.FFN_GATE_INP,
         MODEL_TENSOR.FFN_GATE_EXP,
         MODEL_TENSOR.FFN_DOWN_EXP,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 6e845c479..bf498a5fc 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -155,31 +155,31 @@ class TensorNameMap:
 
         # Attention output
         MODEL_TENSOR.ATTN_OUT: (
-            "gpt_neox.layers.{bid}.attention.dense",                         # gptneox
-            "transformer.h.{bid}.attn.c_proj",                               # gpt2 refact qwen
-            "transformer.blocks.{bid}.attn.out_proj",                        # mpt
-            "transformer.h.{bid}.self_attention.dense",                      # falcon
-            "h.{bid}.self_attention.dense",                                  # bloom
-            "model.layers.{bid}.self_attn.o_proj",                           # llama-hf
-            "layers.{bid}.attention.wo",                                     # llama-pth
-            "encoder.layer.{bid}.attention.output.dense",                    # bert
-            "transformer.h.{bid}.attn.out_proj",                             # gpt-j
-            "language_model.encoder.layers.{bid}.self_attention.dense",      # persimmon
-            "model.layers.{bid}.self_attn.dense",                            # persimmon
-            "h.{bid}.attn.c_proj",                                           # gpt2
-            "transformer.h.{bid}.mixer.out_proj",                            # phi2
-            "model.layers.layers.{bid}.self_attn.o_proj",                    # plamo
-            "model.layers.{bid}.attention.wo",                               # internlm2
-            "encoder.layers.{bid}.attn.out_proj",                            # nomic-bert
-            "transformer.decoder_layer.{bid}.multi_head_attention.linear",   # Grok
-            "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj.weight",  # dbrx
+            "gpt_neox.layers.{bid}.attention.dense",                     # gptneox
+            "transformer.h.{bid}.attn.c_proj",                           # gpt2 refact qwen
+            "transformer.blocks.{bid}.attn.out_proj",                    # mpt
+            "transformer.h.{bid}.self_attention.dense",                  # falcon
+            "h.{bid}.self_attention.dense",                              # bloom
+            "model.layers.{bid}.self_attn.o_proj",                       # llama-hf
+            "layers.{bid}.attention.wo",                                 # llama-pth
+            "encoder.layer.{bid}.attention.output.dense",                # bert
+            "transformer.h.{bid}.attn.out_proj",                         # gpt-j
+            "language_model.encoder.layers.{bid}.self_attention.dense",  # persimmon
+            "model.layers.{bid}.self_attn.dense",                        # persimmon
+            "h.{bid}.attn.c_proj",                                       # gpt2
+            "transformer.h.{bid}.mixer.out_proj",                        # phi2
+            "model.layers.layers.{bid}.self_attn.o_proj",                # plamo
+            "model.layers.{bid}.attention.wo",                           # internlm2
+            "encoder.layers.{bid}.attn.out_proj",                        # nomic-bert
+            "transformer.decoder_layer.{bid}.multi_head_attention.linear"# Grok
         ),
 
         # Attention output norm
         MODEL_TENSOR.ATTN_OUT_NORM: (
-            "encoder.layer.{bid}.attention.output.LayerNorm",  # bert
-            "encoder.layers.{bid}.norm1",                      # nomic-bert
-            "transformer.decoder_layer.{bid}.rms_norm_1",      # Grok
+            "encoder.layer.{bid}.attention.output.LayerNorm",                # bert
+            "encoder.layers.{bid}.norm1",                                    # nomic-bert
+            "transformer.decoder_layer.{bid}.rms_norm_1",                    # Grok
+            "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj.weight",  # dbrx
         ),
 
         # Rotary embeddings
diff --git a/llama.cpp b/llama.cpp
index 016e119cb..360d4b086 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4695,7 +4695,6 @@ static bool llm_load_tensors(
                     layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2,"weight", i), {n_embd});
 
                     layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd});
-                    layer.wo   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
 
                     layer.ffn_gate_inp  = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP,  "weight", i), {n_embd, n_expert});
                     layer.ffn_gate_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff,   n_expert}, false);
@@ -7184,9 +7183,10 @@ struct llm_build_context {
 
             // feed-forward network
             // MoE branch
+            // FIXME REVIEW: I do not see this op in https://huggingface.co/databricks/dbrx-instruct/blob/464e701f50aef4c1b59c81fb5667819a5d08e108/modeling_dbrx.py#L727
             cur = llm_build_norm(ctx0, ffn_inp, hparams,
-                                 model.layers[il].ffn_norm, NULL,
-                                 LLM_NORM_RMS, cb, il);
+                                 NULL, NULL,
+                                 LLM_NORM, cb, il);
             cb(cur, "ffn_norm", il);
 
             ggml_tensor * logits = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp, cur); // [n_tokens, num_experts]
@@ -7244,9 +7244,16 @@ struct llm_build_context {
                     cb(moe_out, "ffn_moe_out", il);
                 }
             }
-
             cur = moe_out;
 
+            // DbrxNormAttentionNorm
+            {
+                cur = llm_build_norm(ctx0, cur, hparams,
+                                     model.layers[il].layer_out_norm, NULL,
+                                     LLM_NORM, cb, il);
+                cb(cur, "layer_out_norm", il);
+            }
+
             cur = ggml_add(ctx0, cur, ffn_inp);
             cb(cur, "ffn_out", il);