diff --git a/src/llama.cpp b/src/llama.cpp
index a28d2d6e9..19ff65238 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -5688,7 +5688,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
 
     const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at(hparams.rope_scaling_type_train);
 
-    auto print_f = [](std::function<uint32_t(uint32_t)> f, uint32_t n) {
+    auto print_f = [](const std::function<uint32_t(uint32_t)> & f, uint32_t n) {
         bool is_var = false;
 
         std::vector<uint32_t> v;
@@ -5954,7 +5954,7 @@ static bool llm_load_tensors(
     // create tensors for the weights
     {
         const int64_t n_embd       = hparams.n_embd;
-        const int64_t n_embd_head  = n_embd / hparams.n_head();
+        const int64_t n_embd_head  = hparams.n_head() > 0 ? n_embd / hparams.n_head() : 0;
         const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
         const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
         const int64_t n_embd_gqa   = n_embd_v_gqa;