From 030fda09a93253c9f1468bf3dab9d7030f14c5bb Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Wed, 30 Oct 2024 23:03:42 +0000
Subject: [PATCH 1/4] `main`: add test-cli + ensure output still logged w/
 --log-disable

---
 Makefile                              |  6 +++
 examples/eval-callback/CMakeLists.txt |  2 +-
 examples/main/main.cpp                | 11 ++--
 ggml/src/ggml.c                       |  6 ++-
 tests/CMakeLists.txt                  |  1 +
 tests/test-cli.cpp                    | 76 +++++++++++++++++++++++++++
 6 files changed, 94 insertions(+), 8 deletions(-)
 create mode 100644 tests/test-cli.cpp

diff --git a/Makefile b/Makefile
index 719f45d16..4b3940e28 100644
--- a/Makefile
+++ b/Makefile
@@ -47,6 +47,7 @@ TEST_TARGETS = \
 	tests/test-autorelease \
 	tests/test-backend-ops \
 	tests/test-chat-template \
+	tests/test-cli \
 	tests/test-double-float \
 	tests/test-grad0 \
 	tests/test-grammar-integration \
@@ -1639,6 +1640,11 @@ tests/test-chat-template: tests/test-chat-template.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+tests/test-cli: tests/test-cli.cpp \
+	$(OBJ_ALL)
+	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 #
 # PoCs
 #
diff --git a/examples/eval-callback/CMakeLists.txt b/examples/eval-callback/CMakeLists.txt
index a48753d38..c125736f3 100644
--- a/examples/eval-callback/CMakeLists.txt
+++ b/examples/eval-callback/CMakeLists.txt
@@ -5,5 +5,5 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
 
 set(TEST_TARGET test-eval-callback)
-add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
+add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0)
 set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 374ed47ad..f7c0cd267 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -113,12 +113,12 @@ static void sigint_handler(int signo) {
             need_insert_eot = true;
         } else {
             console::cleanup();
-            LOG("\n");
+            LOG_INF("\n");
             common_perf_print(*g_ctx, *g_smpl);
             write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);
 
             // make sure all logs are flushed
-            LOG("Interrupted by user\n");
+            LOG_INF("Interrupted by user\n");
             common_log_pause(common_log_main());
 
             _exit(130);
@@ -717,7 +717,8 @@ int main(int argc, char ** argv) {
                 const std::string token_str = common_token_to_piece(ctx, id, params.special);
 
                 // Console/Stream Output
-                LOG("%s", token_str.c_str());
+                fprintf(stdout, "%s", token_str.c_str());
+                fflush(stdout);
 
                 // Record Displayed Tokens To Log
                 // Note: Generated tokens are created one by one hence this check
@@ -920,11 +921,11 @@ int main(int argc, char ** argv) {
     }
 
     if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
-        LOG("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
+        LOG_INF("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
         llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
     }
 
-    LOG("\n\n");
+    LOG_INF("\n\n");
     common_perf_print(ctx, smpl);
     write_logfile(ctx, params, model, input_tokens, output_ss.str(), output_tokens);
 
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 0d99b0791..4e3c840be 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -353,8 +353,10 @@ void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
 void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
     (void) level;
     (void) user_data;
-    fputs(text, stderr);
-    fflush(stderr);
+    if (level != GGML_LOG_LEVEL_DEBUG) {
+        fputs(text, stderr);
+        fflush(stderr);
+    }
 }
 
 #if (GGML_DEBUG >= 1)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 08ad66b49..16009425e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -113,6 +113,7 @@ llama_target_and_test(test-arg-parser.cpp)
 llama_target_and_test(test-quantize-fns.cpp)
 llama_target_and_test(test-quantize-perf.cpp)
 llama_target_and_test(test-sampling.cpp)
+llama_target_and_test(test-cli.cpp WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 llama_target_and_test(test-chat-template.cpp)
 
 llama_target_and_test(test-grammar-parser.cpp)
diff --git a/tests/test-cli.cpp b/tests/test-cli.cpp
new file mode 100644
index 000000000..ce934cca8
--- /dev/null
+++ b/tests/test-cli.cpp
@@ -0,0 +1,76 @@
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <unistd.h>
+
+static std::string read(const std::string & file) {
+    std::ostringstream actuals;
+    actuals << std::ifstream(file.c_str()).rdbuf();
+    return actuals.str();
+}
+
+static void assert_equals(const std::string & expected, const std::string & actual) {
+    if (expected != actual) {
+        std::cerr << "Expected: " << expected << std::endl;
+        std::cerr << "Actual: " << actual << std::endl;
+        std::cerr << std::flush;
+        throw std::runtime_error("Test failed");
+    }
+}
+
+static void assert_contains(const std::string & expected, const std::string & actual) {
+    if (actual.find(expected) == std::string::npos) {
+        std::cerr << "Expected to find: " << expected << std::endl;
+        std::cerr << "Actual: " << actual << std::endl;
+        std::cerr << std::flush;
+        throw std::runtime_error("Test failed");
+    }
+}
+
+struct Out {
+    std::string out;
+    std::string err;
+};
+
+static Out run(const std::string & cmd) {
+    auto full_cmd = cmd + " > out/out.txt 2> out/err.txt";
+    std::cerr << "Running: " << full_cmd << std::endl;
+    if (std::system(full_cmd.c_str()) != 0)
+        throw std::runtime_error("llama-cli binary failed to run.");
+    return {
+        /* .out = */ read("out/out.txt"),
+        /* .err = */ read("out/err.txt"),
+    };
+}
+
+int main(int argc, char ** argv) {
+    std::string cli_bin = argc == 2 ? argv[1] : "./llama-cli";
+
+    std::system("mkdir out/");
+
+    {
+        auto p = run(cli_bin + " --help");
+        if (!p.err.empty())
+            throw std::runtime_error("llama-cli --help should not have any stderr.");
+        assert_contains("example usage", p.out);
+    }
+
+    {
+        auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10");
+        assert_equals(" hello Joe and Joe we", p.out);
+        assert_contains("system_info:", p.err);
+    }
+
+    {
+        auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10 --log-disable");
+        assert_equals(" hello Joe and Joe we", p.out);
+        assert_equals("", p.err);
+    }
+}

From 82d5e91a6fe856eed8bd9747e6ae49feaf479c9a Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Thu, 31 Oct 2024 00:10:44 +0000
Subject: [PATCH 2/4] `test-cli`: greedy sampling + print exception messages

---
 tests/test-cli.cpp | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/tests/test-cli.cpp b/tests/test-cli.cpp
index ce934cca8..8be29c6f8 100644
--- a/tests/test-cli.cpp
+++ b/tests/test-cli.cpp
@@ -53,24 +53,29 @@ static Out run(const std::string & cmd) {
 int main(int argc, char ** argv) {
     std::string cli_bin = argc == 2 ? argv[1] : "./llama-cli";
 
-    std::system("mkdir out/");
+    try {
+        std::system("mkdir out/");
 
-    {
-        auto p = run(cli_bin + " --help");
-        if (!p.err.empty())
-            throw std::runtime_error("llama-cli --help should not have any stderr.");
-        assert_contains("example usage", p.out);
-    }
+        {
+            auto p = run(cli_bin + " --help");
+            if (!p.err.empty())
+                throw std::runtime_error("llama-cli --help should not have any stderr.");
+            assert_contains("example usage", p.out);
+        }
 
-    {
-        auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10");
-        assert_equals(" hello Joe and Joe we", p.out);
-        assert_contains("system_info:", p.err);
-    }
+        {
+            auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 --samplers top-k --top-k 1 -ngl 0 -n 10");
+            assert_equals(" hello was a big, red ball. He", p.out);
+            assert_contains("system_info:", p.err);
+        }
 
-    {
-        auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10 --log-disable");
-        assert_equals(" hello Joe and Joe we", p.out);
-        assert_equals("", p.err);
+        {
+            auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 --samplers top-k --top-k 1 -ngl 0 -n 10 --log-disable");
+            assert_equals(" hello was a big, red ball. He", p.out);
+            assert_equals("", p.err);
+        }
+    } catch (const std::exception & ex) {
+        std::cerr << "[test-cli] Error: " << ex.what() << std::endl;
+        return 1;
     }
 }

From ca512cc934aaeddf4ed5f3c0731ab826de8411cb Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Thu, 31 Oct 2024 02:03:35 +0000
Subject: [PATCH 3/4] `test-cli`: add llama-cli as order-only prerequisite in
 Makefile

---
 Makefile           | 2 +-
 tests/test-cli.cpp | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4b3940e28..001fa58dc 100644
--- a/Makefile
+++ b/Makefile
@@ -1641,7 +1641,7 @@ tests/test-chat-template: tests/test-chat-template.cpp \
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
 tests/test-cli: tests/test-cli.cpp \
-	$(OBJ_ALL)
+	$(OBJ_ALL) | llama-cli
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
diff --git a/tests/test-cli.cpp b/tests/test-cli.cpp
index 8be29c6f8..de249a921 100644
--- a/tests/test-cli.cpp
+++ b/tests/test-cli.cpp
@@ -54,7 +54,8 @@ int main(int argc, char ** argv) {
     std::string cli_bin = argc == 2 ? argv[1] : "./llama-cli";
 
     try {
-        std::system("mkdir out/");
+        if (std::system("mkdir -p out/") != 0)
+            throw std::runtime_error("Failed to create out/ directory.");
 
         {
             auto p = run(cli_bin + " --help");
@@ -74,6 +75,8 @@ int main(int argc, char ** argv) {
             assert_equals(" hello was a big, red ball. He", p.out);
             assert_equals("", p.err);
         }
+
+        return 0;
     } catch (const std::exception & ex) {
         std::cerr << "[test-cli] Error: " << ex.what() << std::endl;
         return 1;

From 243fd5dd37d82f5821accb83bfae275b0c54a503 Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Thu, 31 Oct 2024 02:54:06 +0000
Subject: [PATCH 4/4] Update test-cli.cpp

---
 tests/test-cli.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test-cli.cpp b/tests/test-cli.cpp
index de249a921..a290208a2 100644
--- a/tests/test-cli.cpp
+++ b/tests/test-cli.cpp
@@ -42,11 +42,13 @@ struct Out {
 static Out run(const std::string & cmd) {
     auto full_cmd = cmd + " > out/out.txt 2> out/err.txt";
     std::cerr << "Running: " << full_cmd << std::endl;
+    auto out = read("out/out.txt");
+    auto err = read("out/err.txt");
     if (std::system(full_cmd.c_str()) != 0)
-        throw std::runtime_error("llama-cli binary failed to run.");
+        throw std::runtime_error("llama-cli binary failed to run.\nstdout: " + out + "\nstderr: " + err);
     return {
-        /* .out = */ read("out/out.txt"),
-        /* .err = */ read("out/err.txt"),
+        /* .out = */ out,
+        /* .err = */ err,
     };
 }