diff --git a/Makefile b/Makefile
index 278ab6361..9b616294d 100644
--- a/Makefile
+++ b/Makefile
@@ -72,8 +72,13 @@ endif
 #       feel free to update the Makefile for your architecture and send a pull request or issue
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	# Use all CPU extensions that are available:
-	CFLAGS += -mf16c -mavx -msse3 
-	BONUSCFLAGS += -mfma -mavx2 
+	CFLAGS += -mavx 
+	ifeq ($(OS),Windows_NT)
+		BONUSCFLAGS += -mfma -mavx2 -mf16c -msse3 
+	else
+# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
+		CFLAGS += -march=native -mtune=native
+	endif
 endif
 ifneq ($(filter ppc64%,$(UNAME_M)),)
 	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index e04f04c40..667f8b1d5 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -224,7 +224,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     int original_threads = params.n_threads;
     if (blasmode)
     {
-        params.n_batch = 1024;
+        params.n_batch = 512; //received reports of 1024 and above crashing on some models
         params.n_threads = 1;
     }
 
diff --git a/llama_adapter.cpp b/llama_adapter.cpp
index 93f46fae6..6e437eb6c 100644
--- a/llama_adapter.cpp
+++ b/llama_adapter.cpp
@@ -160,7 +160,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
     int original_threads = params.n_threads;
     if (blasmode)
     {
-        params.n_batch = 1024;
+        params.n_batch = 512; //received reports of 1024 and above crashing on some models
         params.n_threads = 1;
     }