diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a6d1b678b..1777489ec 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -24,6 +24,7 @@ env:
   GGML_NLOOP: 3
   GGML_N_THREADS: 1
   LLAMA_LOG_COLORS: 1
+  LLAMA_LOG_PREFIX: 1
   LLAMA_LOG_TIMESTAMPS: 1
 
 jobs:
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index 3bc6b35c0..699ac095d 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -22,6 +22,7 @@ on:
 
 env:
   LLAMA_LOG_COLORS: 1
+  LLAMA_LOG_PREFIX: 1
   LLAMA_LOG_TIMESTAMPS: 1
   LLAMA_LOG_VERBOSITY: 10
 
diff --git a/ci/run.sh b/ci/run.sh
index 798efea1e..1ac08ee4e 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -737,6 +737,7 @@ function gg_sum_embd_bge_small {
 
 ## main
 
+export LLAMA_LOG_PREFIX=1
 export LLAMA_LOG_TIMESTAMPS=1
 
 if [ -z ${GG_BUILD_LOW_PERF} ]; then
diff --git a/common/arg.cpp b/common/arg.cpp
index c49b07cc8..8fcb8c25f 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1968,6 +1968,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             gpt_log_set_verbosity_thold(value);
         }
     ).set_env("LLAMA_LOG_VERBOSITY"));
+    add_opt(llama_arg(
+        {"--log-prefix"},
+        "Enable prefx in log messages",
+        [](gpt_params &) {
+            gpt_log_set_prefix(gpt_log_main(), true);
+        }
+    ).set_env("LLAMA_LOG_PREFIX"));
     add_opt(llama_arg(
         {"--log-timestamps"},
         "Enable timestamps in log messages",
diff --git a/common/log.cpp b/common/log.cpp
index 99ff6bba2..97006ed3a 100644
--- a/common/log.cpp
+++ b/common/log.cpp
@@ -57,6 +57,8 @@ static std::vector<const char *> g_col = {
 struct gpt_log_entry {
     enum ggml_log_level level;
 
+    bool prefix;
+
     int64_t timestamp;
 
     std::vector<char> msg;
@@ -80,7 +82,7 @@ struct gpt_log_entry {
             }
         }
 
-        if (level != GGML_LOG_LEVEL_NONE) {
+        if (level != GGML_LOG_LEVEL_NONE && prefix) {
             if (timestamp) {
                 // [M.s.ms.us]
                 fprintf(fcur, "%s%d.%02d.%03d.%03d%s ",
@@ -118,6 +120,7 @@ struct gpt_log {
 
     gpt_log(size_t capacity) {
         file = nullptr;
+        prefix = false;
         timestamps = false;
         running = false;
         t_start = t_us();
@@ -148,6 +151,7 @@ private:
 
     FILE * file;
 
+    bool prefix;
     bool timestamps;
     bool running;
 
@@ -205,6 +209,7 @@ public:
         }
 
         entry.level = level;
+        entry.prefix = prefix;
         entry.timestamp = 0;
         if (timestamps) {
             entry.timestamp = t_us() - t_start;
@@ -333,6 +338,12 @@ public:
         resume();
     }
 
+    void set_prefix(bool prefix) {
+        std::lock_guard<std::mutex> lock(mtx);
+
+        this->prefix = prefix;
+    }
+
     void set_timestamps(bool timestamps) {
         std::lock_guard<std::mutex> lock(mtx);
 
@@ -381,6 +392,10 @@ void gpt_log_set_colors(struct gpt_log * log, bool colors) {
     log->set_colors(colors);
 }
 
+void gpt_log_set_prefix(struct gpt_log * log, bool prefix) {
+    log->set_prefix(prefix);
+}
+
 void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps) {
     log->set_timestamps(timestamps);
 }
diff --git a/common/log.h b/common/log.h
index f00fae702..624ef7c0d 100644
--- a/common/log.h
+++ b/common/log.h
@@ -32,9 +32,29 @@ void             gpt_log_free  (struct gpt_log * log);
 LOG_ATTRIBUTE_FORMAT(3, 4)
 void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
 
+// defaults: file = NULL, colors = false, prefix = false, timestamps = false
+//
+// regular log output:
+//
+//   ggml_backend_metal_log_allocated_size: allocated buffer, size =  6695.84 MiB, ( 6695.91 / 21845.34)
+//   llm_load_tensors: ggml ctx size =    0.27 MiB
+//   llm_load_tensors: offloading 32 repeating layers to GPU
+//   llm_load_tensors: offloading non-repeating layers to GPU
+//
+// with prefix = true, timestamps = true, the log output will look like this:
+//
+//   0.00.035.060 D ggml_backend_metal_log_allocated_size: allocated buffer, size =  6695.84 MiB, ( 6695.91 / 21845.34)
+//   0.00.035.064 I llm_load_tensors: ggml ctx size =    0.27 MiB
+//   0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
+//   0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
+//
+// I - info, W - warning, E - error, D - debug
+//
+
 void gpt_log_set_file      (struct gpt_log * log, const char * file);       // not thread-safe
 void gpt_log_set_colors    (struct gpt_log * log,       bool   colors);     // not thread-safe
-void gpt_log_set_timestamps(struct gpt_log * log,       bool   timestamps);
+void gpt_log_set_prefix    (struct gpt_log * log,       bool   prefix);     // whether to output prefix to each log
+void gpt_log_set_timestamps(struct gpt_log * log,       bool   timestamps); // whether to output timestamps in the prefix
 
 // helper macros for logging
 // use these to avoid computing log arguments if the verbosity is lower than the threshold
diff --git a/tests/test-log.cpp b/tests/test-log.cpp
index 06df144ee..211222369 100644
--- a/tests/test-log.cpp
+++ b/tests/test-log.cpp
@@ -25,6 +25,7 @@ int main() {
 
                 if (rand () % 10 < 5) {
                     gpt_log_set_timestamps(gpt_log_main(), rand() % 2);
+                    gpt_log_set_prefix    (gpt_log_main(), rand() % 2);
                 }
             }
         });