diff --git a/Makefile b/Makefile
index 5c8994385..a80cb11fd 100644
--- a/Makefile
+++ b/Makefile
@@ -47,6 +47,7 @@ TEST_TARGETS = \
 	tests/test-autorelease \
 	tests/test-backend-ops \
 	tests/test-chat-template \
+	tests/test-cli \
 	tests/test-double-float \
 	tests/test-grammar-integration \
 	tests/test-grammar-parser \
@@ -1508,6 +1509,11 @@ tests/test-chat-template: tests/test-chat-template.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+tests/test-cli: tests/test-cli.cpp \
+	$(OBJ_ALL) | llama-cli
+	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 #
 # PoCs
 #
diff --git a/examples/eval-callback/CMakeLists.txt b/examples/eval-callback/CMakeLists.txt
index a48753d38..c125736f3 100644
--- a/examples/eval-callback/CMakeLists.txt
+++ b/examples/eval-callback/CMakeLists.txt
@@ -5,5 +5,5 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
 
 set(TEST_TARGET test-eval-callback)
-add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
+add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0)
 set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 7c4ce4be2..62246944c 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -70,11 +70,11 @@ static void sigint_handler(int signo) {
             need_insert_eot = true;
         } else {
             console::cleanup();
-            LOG("\n");
+            LOG_INF("\n");
             common_perf_print(*g_ctx, *g_smpl);
 
             // make sure all logs are flushed
-            LOG("Interrupted by user\n");
+            LOG_INF("Interrupted by user\n");
             common_log_pause(common_log_main());
 
             _exit(130);
@@ -673,7 +673,8 @@ int main(int argc, char ** argv) {
                 const std::string token_str = common_token_to_piece(ctx, id, params.special);
 
                 // Console/Stream Output
-                LOG("%s", token_str.c_str());
+                fprintf(stdout, "%s", token_str.c_str());
+                fflush(stdout);
 
                 // Record Displayed Tokens To Log
                 // Note: Generated tokens are created one by one hence this check
@@ -876,11 +877,11 @@ int main(int argc, char ** argv) {
     }
 
     if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
-        LOG("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
+        LOG_INF("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
         llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
     }
 
-    LOG("\n\n");
+    LOG_INF("\n\n");
     common_perf_print(ctx, smpl);
 
     common_sampler_free(smpl);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 78e7874de..4d15f6d92 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -217,8 +217,10 @@ void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
 void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
     (void) level;
     (void) user_data;
-    fputs(text, stderr);
-    fflush(stderr);
+    if (level != GGML_LOG_LEVEL_DEBUG) {
+        fputs(text, stderr);
+        fflush(stderr);
+    }
 }
 
 //
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index b06f122e8..0bdc045cd 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -113,6 +113,7 @@ llama_target_and_test(test-arg-parser.cpp)
 llama_target_and_test(test-quantize-fns.cpp)
 llama_target_and_test(test-quantize-perf.cpp)
 llama_target_and_test(test-sampling.cpp)
+llama_target_and_test(test-cli.cpp WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 llama_target_and_test(test-chat-template.cpp)
 
 llama_target_and_test(test-grammar-parser.cpp)
diff --git a/tests/test-cli.cpp b/tests/test-cli.cpp
new file mode 100644
index 000000000..a290208a2
--- /dev/null
+++ b/tests/test-cli.cpp
@@ -0,0 +1,86 @@
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <unistd.h>
+
+static std::string read(const std::string & file) {
+    std::ostringstream actuals;
+    actuals << std::ifstream(file.c_str()).rdbuf();
+    return actuals.str();
+}
+
+static void assert_equals(const std::string & expected, const std::string & actual) {
+    if (expected != actual) {
+        std::cerr << "Expected: " << expected << std::endl;
+        std::cerr << "Actual: " << actual << std::endl;
+        std::cerr << std::flush;
+        throw std::runtime_error("Test failed");
+    }
+}
+
+static void assert_contains(const std::string & expected, const std::string & actual) {
+    if (actual.find(expected) == std::string::npos) {
+        std::cerr << "Expected to find: " << expected << std::endl;
+        std::cerr << "Actual: " << actual << std::endl;
+        std::cerr << std::flush;
+        throw std::runtime_error("Test failed");
+    }
+}
+
+struct Out {
+    std::string out;
+    std::string err;
+};
+
+static Out run(const std::string & cmd) {
+    auto full_cmd = cmd + " > out/out.txt 2> out/err.txt";
+    std::cerr << "Running: " << full_cmd << std::endl;
+    auto out = read("out/out.txt");
+    auto err = read("out/err.txt");
+    if (std::system(full_cmd.c_str()) != 0)
+        throw std::runtime_error("llama-cli binary failed to run.\nstdout: " + out + "\nstderr: " + err);
+    return {
+        /* .out = */ out,
+        /* .err = */ err,
+    };
+}
+
+int main(int argc, char ** argv) {
+    std::string cli_bin = argc == 2 ? argv[1] : "./llama-cli";
+
+    try {
+        if (std::system("mkdir -p out/") != 0)
+            throw std::runtime_error("Failed to create out/ directory.");
+
+        {
+            auto p = run(cli_bin + " --help");
+            if (!p.err.empty())
+                throw std::runtime_error("llama-cli --help should not have any stderr.");
+            assert_contains("example usage", p.out);
+        }
+
+        {
+            auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 --samplers top-k --top-k 1 -ngl 0 -n 10");
+            assert_equals(" hello was a big, red ball. He", p.out);
+            assert_contains("system_info:", p.err);
+        }
+
+        {
+            auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 --samplers top-k --top-k 1 -ngl 0 -n 10 --log-disable");
+            assert_equals(" hello was a big, red ball. He", p.out);
+            assert_equals("", p.err);
+        }
+
+        return 0;
+    } catch (const std::exception & ex) {
+        std::cerr << "[test-cli] Error: " << ex.what() << std::endl;
+        return 1;
+    }
+}