diff --git a/examples/common.cpp b/examples/common.cpp
index 32247cef7..b5810f28f 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -299,6 +299,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             params.use_mmap = false;
         } else if (arg == "--mtest") {
             params.mem_test = true;
+        } else if (arg == "--export") {
+            params.export_cgraph = true;
         } else if (arg == "--verbose-prompt") {
             params.verbose_prompt = true;
         } else if (arg == "-r" || arg == "--reverse-prompt") {
@@ -438,6 +440,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stderr, "                        number of layers to store in VRAM\n");
 #endif
     fprintf(stderr, "  --mtest               compute maximum memory usage\n");
+    fprintf(stderr, "  --export              export the computation graph to 'llama.ggml'\n");
     fprintf(stderr, "  --verbose-prompt      print prompt before generation\n");
     fprintf(stderr, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n");
     fprintf(stderr, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n");
diff --git a/examples/common.h b/examples/common.h
index fea9aa81a..66bdeb5e9 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -71,6 +71,7 @@ struct gpt_params {
     bool use_mmap          = true;  // use mmap for faster loads
     bool use_mlock         = false; // use mlock to keep model in memory
     bool mem_test          = false; // compute maximum memory usage
+    bool export_cgraph     = false; // export the computation graph
     bool verbose_prompt    = false; // print prompt tokens before generation
 };
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 6131f5b46..552f8b38d 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -134,6 +134,13 @@ int main(int argc, char ** argv) {
         return 0;
     }
 
+    // export the cgraph and exit
+    if (params.export_cgraph) {
+        llama_eval_export(ctx, "llama.ggml");
+        llama_free(ctx);
+
+        return 0;
+    }
 
     std::string path_session = params.path_prompt_cache;
     std::vector<llama_token> session_tokens;
diff --git a/examples/mtl/CMakeLists.txt b/examples/mtl/CMakeLists.txt
index a8923405f..c532a5582 100644
--- a/examples/mtl/CMakeLists.txt
+++ b/examples/mtl/CMakeLists.txt
@@ -1,12 +1,3 @@
-set(TARGET mtl-export)
-add_executable(${TARGET} mtl-export.cpp)
-target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(${TARGET} PRIVATE cxx_std_11)
-
-if(TARGET BUILD_INFO)
-  add_dependencies(${TARGET} BUILD_INFO)
-endif()
-
 if (APPLE)
     #
     # mtl
diff --git a/examples/mtl/mtl-export.cpp b/examples/mtl/mtl-export.cpp
deleted file mode 100644
index 7872182a1..000000000
--- a/examples/mtl/mtl-export.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "common.h"
-#include "llama.h"
-
-int main(int argc, char ** argv) {
-    gpt_params params;
-
-    if (!gpt_params_parse(argc, argv, params)) {
-        return 1;
-    }
-
-    llama_init_backend();
-
-    llama_context * ctx = llama_init_from_gpt_params(params);
-    if (ctx == NULL) {
-        fprintf(stderr, "%s: error: unable to load model\n", __func__);
-        return 1;
-    }
-
-    llama_eval_export(ctx, "llama.ggml");
-
-    llama_print_timings(ctx);
-    llama_free(ctx);
-
-    return 0;
-}