diff --git a/Makefile b/Makefile
index a8658a596..c9aa01b65 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ TEST_TARGETS = \
 	tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
 	tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama          \
 	tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope      \
-	tests/test-backend-ops tests/test-autorelease
+	tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
 
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
 tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get_model.cpp $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get_model.cpp $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/ci/run.sh b/ci/run.sh
index f51b5a39d..6245154f1 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -238,8 +238,6 @@ function gg_run_open_llama_3b_v2 {
 
     wiki_test_60="${path_wiki}/wiki.test-60.raw"
 
-    ./bin/test-autorelease ${model_f16}
-
     ./bin/quantize ${model_f16} ${model_q8_0} q8_0
     ./bin/quantize ${model_f16} ${model_q4_0} q4_0
     ./bin/quantize ${model_f16} ${model_q4_1} q4_1
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9ec39ef30..9dd972abd 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -55,11 +55,11 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
 llama_build_and_test_executable(test-grad0.cpp)
 # llama_build_and_test_executable(test-opt.cpp) # SLOW
 llama_build_and_test_executable(test-backend-ops.cpp)
-llama_build_and_test_executable(test-autorelease.cpp)
 
 llama_build_and_test_executable(test-rope.cpp)
 
 llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
+llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
 
 # dummy executable - not installed
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
diff --git a/tests/get_model.cpp b/tests/get_model.cpp
new file mode 100644
index 000000000..af7b0008f
--- /dev/null
+++ b/tests/get_model.cpp
@@ -0,0 +1,26 @@
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "get_model.h"
+
+char * get_model_or_exit(int argc, char *argv[]) {
+    char * makelevel = getenv("MAKELEVEL");
+    if (makelevel != nullptr && atoi(makelevel) > 0) {
+        fprintf(stderr, "Detected being run in Make. Skipping this test.\n");
+        exit(EXIT_SUCCESS);
+    }
+
+    char * model_path;
+    if (argc > 1) {
+        model_path = argv[1];
+    } else {
+        model_path = getenv("GG_RUN_CTEST_MODELFILE");
+        if (!model_path || strlen(model_path) == 0) {
+            fprintf(stderr, "error: no model file provided\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    return model_path;
+}
diff --git a/tests/get_model.h b/tests/get_model.h
new file mode 100644
index 000000000..2086b09d0
--- /dev/null
+++ b/tests/get_model.h
@@ -0,0 +1 @@
+char * get_model_or_exit(int, char*[]);
diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp
index 289c6ba6c..af2c2fccf 100644
--- a/tests/test-autorelease.cpp
+++ b/tests/test-autorelease.cpp
@@ -5,19 +5,15 @@
 #include <thread>
 
 #include "llama.h"
+#include "get_model.h"
 
 // This creates a new context inside a pthread and then tries to exit cleanly.
 int main(int argc, char ** argv) {
-    if (argc < 2) {
-        printf("Usage: %s model.gguf\n", argv[0]);
-        return 0; // intentionally return success
-    }
+    auto * model_path = get_model_or_exit(argc, argv);
 
-    const std::string fname = argv[1];
-
-    std::thread([&fname]() {
+    std::thread([&model_path]() {
         llama_backend_init(false);
-        auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
+        auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
         auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
         llama_free(ctx);
         llama_free_model(model);
diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp
index a2c1acf53..b75aa03a8 100644
--- a/tests/test-model-load-cancel.cpp
+++ b/tests/test-model-load-cancel.cpp
@@ -1,21 +1,10 @@
 #include "llama.h"
+#include "get_model.h"
 
-#include <cstdio>
 #include <cstdlib>
-#include <cstring>
 
 int main(int argc, char *argv[] ) {
-    char * model_path;
-    if (argc > 1) {
-        model_path = argv[1];
-    } else {
-        model_path = getenv("GG_RUN_CTEST_MODELFILE");
-        if (!model_path || strlen(model_path) == 0) {
-            fprintf(stderr, "error: no model file provided\n");
-            exit(1);
-        }
-    }
-
+    auto * model_path = get_model_or_exit(argc, argv);
     auto * file = fopen(model_path, "r");
     if (file == nullptr) {
         fprintf(stderr, "no model at '%s' found\n", model_path);