mtl : no need for mtl-export tool, add cli arg for main instead

2023-05-29 21:28:59 +03:00 · 2023-05-29 21:28:59 +03:00 · a792cbd0fc
commit a792cbd0fc
parent b23fe8c9c7
5 changed files with 11 additions and 34 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -299,6 +299,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
            params.use_mmap = false;
        } else if (arg == "--mtest") {
            params.mem_test = true;
        } else if (arg == "--export") {
            params.export_cgraph = true;
        } else if (arg == "--verbose-prompt") {
            params.verbose_prompt = true;
        } else if (arg == "-r" || arg == "--reverse-prompt") {
@ -438,6 +440,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    fprintf(stderr, "                        number of layers to store in VRAM\n");
 #endif
    fprintf(stderr, "  --mtest               compute maximum memory usage\n");
    fprintf(stderr, "  --export              export the computation graph to 'llama.ggml'\n");
    fprintf(stderr, "  --verbose-prompt      print prompt before generation\n");
    fprintf(stderr, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n");
    fprintf(stderr, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n");
--- a/examples/common.h
+++ b/examples/common.h
@ -71,6 +71,7 @@ struct gpt_params {
    bool use_mmap          = true;  // use mmap for faster loads
    bool use_mlock         = false; // use mlock to keep model in memory
    bool mem_test          = false; // compute maximum memory usage
    bool export_cgraph     = false; // export the computation graph
    bool verbose_prompt    = false; // print prompt tokens before generation
 };
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -134,6 +134,13 @@ int main(int argc, char ** argv) {
        return 0;
    }
    // export the cgraph and exit
    if (params.export_cgraph) {
        llama_eval_export(ctx, "llama.ggml");
        llama_free(ctx);
        return 0;
    }
    std::string path_session = params.path_prompt_cache;
    std::vector<llama_token> session_tokens;
--- a/examples/mtl/CMakeLists.txt
+++ b/examples/mtl/CMakeLists.txt
@ -1,12 +1,3 @@
 set(TARGET mtl-export)
 add_executable(${TARGET} mtl-export.cpp)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
 if(TARGET BUILD_INFO)
  add_dependencies(${TARGET} BUILD_INFO)
 endif()
 if (APPLE)
    #
    # mtl
--- a/examples/mtl/mtl-export.cpp
+++ b/examples/mtl/mtl-export.cpp
@ -1,25 +0,0 @@
 #include "common.h"
 #include "llama.h"
 int main(int argc, char ** argv) {
    gpt_params params;
    if (!gpt_params_parse(argc, argv, params)) {
        return 1;
    }
    llama_init_backend();
    llama_context * ctx = llama_init_from_gpt_params(params);
    if (ctx == NULL) {
        fprintf(stderr, "%s: error: unable to load model\n", __func__);
        return 1;
    }
    llama_eval_export(ctx, "llama.ggml");
    llama_print_timings(ctx);
    llama_free(ctx);
    return 0;
 }