diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index ee5658b38..ac11ad767 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -318,7 +318,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
 
         //determine mem per token
         const std::vector<int> tmp = {1, 2, 3, 4};
-        llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads);
+        auto er = llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads);
+        if(er!=0)
+        {
+            printf("\nLLAMA EVAL returned nonzero!\n");
+        }
         return ModelLoadResult::SUCCESS;
     }
     else if (file_format == FileFormat::RWKV_1)
diff --git a/llama.cpp b/llama.cpp
index 745b95823..37bd155c8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1137,11 +1137,11 @@ static bool llama_eval_internal(
             const int   n_past,
             const int   n_threads) {
 
-    // enforce that the first token is BOS
-    if (n_past == 0 && tokens[0] != llama_token_bos()) {
-        fprintf(stderr, "%s: first token must be BOS\n", __func__);
-        return false;
-    }
+    // // enforce that the first token is BOS
+    // if (n_past == 0 && tokens[0] != llama_token_bos()) {
+    //     fprintf(stderr, "%s: first token must be BOS\n", __func__);
+    //     return false;
+    // }
 
     const int64_t t_start_us = ggml_time_us();