From f8f97e74f9337f901723b3bbe1bf90f0f465f239 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 8 Apr 2024 01:17:33 +0200 Subject: [PATCH] llama: dbrx: hardcode nn.LayerNorm epsilon --- llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llama.cpp b/llama.cpp index 9a1e72273..cac0dd0c6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3986,6 +3986,7 @@ static void llm_load_hparams( case LLM_ARCH_DBRX: { ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv); + hparams.f_norm_eps = 1.e-5; // REVIEW is that OK ? https://pytorch.org/docs/stable/generated/torch.nn.LayerNorm.html switch (hparams.n_layer) { case 40: model.type = e_model::MODEL_132B; break;