diff --git a/examples/common.cpp b/examples/common.cpp index 32247cef7..b5810f28f 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -299,6 +299,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_mmap = false; } else if (arg == "--mtest") { params.mem_test = true; + } else if (arg == "--export") { + params.export_cgraph = true; } else if (arg == "--verbose-prompt") { params.verbose_prompt = true; } else if (arg == "-r" || arg == "--reverse-prompt") { @@ -438,6 +440,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " number of layers to store in VRAM\n"); #endif fprintf(stderr, " --mtest compute maximum memory usage\n"); + fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n"); fprintf(stderr, " --verbose-prompt print prompt before generation\n"); fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n"); diff --git a/examples/common.h b/examples/common.h index fea9aa81a..66bdeb5e9 100644 --- a/examples/common.h +++ b/examples/common.h @@ -71,6 +71,7 @@ struct gpt_params { bool use_mmap = true; // use mmap for faster loads bool use_mlock = false; // use mlock to keep model in memory bool mem_test = false; // compute maximum memory usage + bool export_cgraph = false; // export the computation graph bool verbose_prompt = false; // print prompt tokens before generation }; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 6131f5b46..552f8b38d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -134,6 +134,13 @@ int main(int argc, char ** argv) { return 0; } + // export the cgraph and exit + if (params.export_cgraph) { + llama_eval_export(ctx, "llama.ggml"); + llama_free(ctx); + + return 0; + } std::string path_session = params.path_prompt_cache; std::vector session_tokens; diff --git a/examples/mtl/CMakeLists.txt b/examples/mtl/CMakeLists.txt index a8923405f..c532a5582 100644 --- a/examples/mtl/CMakeLists.txt +++ b/examples/mtl/CMakeLists.txt @@ -1,12 +1,3 @@ -set(TARGET mtl-export) -add_executable(${TARGET} mtl-export.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) -target_compile_features(${TARGET} PRIVATE cxx_std_11) - -if(TARGET BUILD_INFO) - add_dependencies(${TARGET} BUILD_INFO) -endif() - if (APPLE) # # mtl diff --git a/examples/mtl/mtl-export.cpp b/examples/mtl/mtl-export.cpp deleted file mode 100644 index 7872182a1..000000000 --- a/examples/mtl/mtl-export.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "common.h" -#include "llama.h" - -int main(int argc, char ** argv) { - gpt_params params; - - if (!gpt_params_parse(argc, argv, params)) { - return 1; - } - - llama_init_backend(); - - llama_context * ctx = llama_init_from_gpt_params(params); - if (ctx == NULL) { - fprintf(stderr, "%s: error: unable to load model\n", __func__); - return 1; - } - - llama_eval_export(ctx, "llama.ggml"); - - llama_print_timings(ctx); - llama_free(ctx); - - return 0; -}