diff --git a/libc/log/showcrashreports.c b/libc/log/showcrashreports.c
index 8f5a9d307..fd6bed95b 100644
--- a/libc/log/showcrashreports.c
+++ b/libc/log/showcrashreports.c
@@ -60,6 +60,7 @@ static inline void __oncrash(int sig, struct siginfo *si, void *arg) {
 }
 
 static void __got_sigquit(int sig, struct siginfo *si, void *arg) {
+  write(2, "^\\", 2);
   __oncrash(sig, si, arg);
 }
 static void __got_sigfpe(int sig, struct siginfo *si, void *arg) {
diff --git a/third_party/ggml/common.cc b/third_party/ggml/common.cc
index 7e1ca4334..e75dfd9f4 100644
--- a/third_party/ggml/common.cc
+++ b/third_party/ggml/common.cc
@@ -109,6 +109,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             params.seed = std::stoi(argv[i]);
         } else if (arg == "-v" || arg == "--verbose") {
             ++params.verbose;
+        } else if (arg == "-q" || arg == "--quiet") {
+            --params.verbose;
         } else if (arg == "-t" || arg == "--threads") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -332,7 +334,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
 
     // if no prompt is specified, then use companion ai
     if (params.prompt.empty()) {
-        if (params.verbose) {
+        if (params.verbose > 0) {
             fprintf(stderr, "%s: No prompt specified\n", __func__);
             fprintf(stderr, "%s: Loading CompanionAI\n", __func__);
         }
@@ -368,7 +370,8 @@ void gpt_print_usage(FILE *f, int /*argc*/, char ** argv, const gpt_params & par
     fprintf(f, "\n");
     fprintf(f, "options:\n");
     fprintf(f, "  -h, --help            show this help message and exit\n");
-    fprintf(f, "  -v, --verbose         print plenty of helpful information, e.g. prompt\n");
+    fprintf(f, "  -v, --verbose         print helpful information to stderr [repeatable]\n");
+    fprintf(f, "  -s, --silent          disables ephemeral progress indicators [repeatable]\n");
     fprintf(f, "  -i, --interactive     run in interactive mode\n");
     fprintf(f, "  --interactive-first   run in interactive mode and wait for input right away\n");
     fprintf(f, "  -ins, --instruct      run in instruction mode (use with Alpaca models)\n");
diff --git a/third_party/ggml/common.h b/third_party/ggml/common.h
index 6ec10350a..cffd92363 100644
--- a/third_party/ggml/common.h
+++ b/third_party/ggml/common.h
@@ -25,7 +25,7 @@ struct gpt_params {
     int32_t n_predict     = -1;   // new tokens to predict
     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
     int32_t n_ctx         = 512;  // context size
-    int32_t n_batch       = 64;   // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_batch       = 32;   // batch size for prompt processing (must be >=32 to use BLAS)
     int32_t n_keep        = 0;    // number of tokens to keep from initial prompt
 
     // sampling parameters
diff --git a/third_party/ggml/llama.cc b/third_party/ggml/llama.cc
index 7de386066..268deaf5f 100644
--- a/third_party/ggml/llama.cc
+++ b/third_party/ggml/llama.cc
@@ -936,7 +936,7 @@ static void llama_model_load_internal(
         hparams.n_ctx = n_ctx;
     }
 
-    if (verbose) {
+    if (verbose > 0) {
         fprintf(stderr, "%s: format     = %s\n",  __func__, llama_file_version_name(file_version));
         fprintf(stderr, "%s: n_vocab    = %u\n",  __func__, hparams.n_vocab);
         fprintf(stderr, "%s: n_ctx      = %u\n",  __func__, hparams.n_ctx);
@@ -959,7 +959,7 @@ static void llama_model_load_internal(
 
     size_t ctx_size, mmapped_size;
     ml->calc_sizes(&ctx_size, &mmapped_size);
-    if (verbose) {
+    if (verbose > 0) {
         fprintf(stderr, "%s: ggml ctx size = %6.2f KB\n", __func__, ctx_size/1024.0);
     }
 
@@ -979,7 +979,7 @@ static void llama_model_load_internal(
         const size_t mem_required_state =
             scale*MEM_REQ_KV_SELF().at(model.type);
 
-        if (verbose) {
+        if (verbose > 0) {
             fprintf(stderr, "%s: mem required  = %7.2f MB (+ %7.2f MB per state)\n", __func__,
                     mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
         }
@@ -2108,7 +2108,7 @@ struct llama_context * llama_init_from_file(
     }
 
     unsigned cur_percentage = 0;
-    if (verbose && params.progress_callback == NULL) {
+    if (verbose > 0 && params.progress_callback == NULL) {
         params.progress_callback_user_data = &cur_percentage;
         params.progress_callback = [](float progress, void * ctx) {
             unsigned * cur_percentage_p = (unsigned *) ctx;
@@ -2146,7 +2146,7 @@ struct llama_context * llama_init_from_file(
             return nullptr;
         }
 
-        if (verbose) {
+        if (verbose > 0) {
             const size_t memory_size = ggml_nbytes(ctx->model.kv_self.k) + ggml_nbytes(ctx->model.kv_self.v);
             fprintf(stderr, "%s: kv self size  = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
         }
diff --git a/third_party/ggml/main.cc b/third_party/ggml/main.cc
index 65aba0571..cda679bb4 100644
--- a/third_party/ggml/main.cc
+++ b/third_party/ggml/main.cc
@@ -31,6 +31,7 @@
 #include "libc/calls/struct/sigaction.h"
 #include "libc/calls/struct/stat.h"
 #include "libc/intrin/bits.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/log/log.h"
 #include "libc/macros.internal.h"
 #include "libc/nexgen32e/x86feature.h"
@@ -62,18 +63,25 @@ static console_state con_st;
 
 ////////////////////////////////////////////////////////////////////////////////
 
-static std::atomic<bool> is_interacting;
+static std::atomic<bool> is_stalled;
 static std::atomic<bool> is_terminated;
+static std::atomic<bool> is_interacting;
+
+static void acknowledge_shutdown(void) {
+    write(2, "^C", 2);
+}
 
 static void sigint_handler_batch(int signo) {
     is_terminated = true;
+    acknowledge_shutdown();
 }
 
 static void sigint_handler_interactive(int signo) {
-    if (!is_interacting) {
+    if (!is_interacting && !is_stalled) {
         is_interacting = true;
     } else {
         is_terminated = true;
+        acknowledge_shutdown();
     }
 }
 
@@ -223,7 +231,7 @@ int main(int argc, char ** argv) {
         params.seed = time(NULL);
     }
 
-    if (params.verbose) {
+    if (params.verbose > 0) {
         fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
     }
 
@@ -258,7 +266,7 @@ int main(int argc, char ** argv) {
     }
 
     // print system information
-    if (params.verbose) {
+    if (params.verbose > 0) {
         fprintf(stderr, "\n");
         fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
                 params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
@@ -277,7 +285,7 @@ int main(int argc, char ** argv) {
             llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads);
         }
 
-        if (params.verbose) {
+        if (params.verbose > 0) {
             llama_print_timings(ctx);
         }
         llama_free(ctx);
@@ -365,22 +373,22 @@ int main(int argc, char ** argv) {
     sigaction(SIGINT, &sa, NULL);
 
     if (params.interactive) {
-        if (params.verbose) {
+        if (params.verbose > 0) {
             fprintf(stderr, "%s: interactive mode on.\n", __func__);
         }
 
-        if (params.verbose && params.antiprompt.size()) {
+        if (params.verbose > 0 && params.antiprompt.size()) {
             for (auto antiprompt : params.antiprompt) {
                 fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str());
             }
         }
 
-        if (params.verbose && !params.input_prefix.empty()) {
+        if (params.verbose > 0 && !params.input_prefix.empty()) {
             fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str());
         }
     }
 
-    if (params.verbose) {
+    if (params.verbose > 0) {
         fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n",
                 params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty);
         fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n",
@@ -388,7 +396,7 @@ int main(int argc, char ** argv) {
         fprintf(stderr, "\n\n");
     }
 
-    if (params.verbose && params.interactive) {
+    if (params.verbose > 0 && params.interactive) {
         fprintf(stderr, "== Running in interactive mode. ==\n"
                " - Press Ctrl+C to interject at any time.\n"
                " - Press Return to return control to LLaMa.\n"
@@ -399,7 +407,7 @@ int main(int argc, char ** argv) {
     remember_init();
 
     bool is_antiprompt = false;
-    bool input_noecho  = !params.verbose;
+    bool input_noecho  = params.verbose <= 0;
 
     int n_past     = 0;
     int n_remain   = params.n_predict;
@@ -443,7 +451,7 @@ int main(int argc, char ** argv) {
         // check expected state size
         state_size = llama_get_state_size(ctx);
         if (READ64LE(header->state_size) != state_size) {
-            if (params.verbose) {
+            if (params.verbose > 0) {
                 fprintf(stderr, "%s: prompt has stale data state size\n",
                         params.prompt_path.c_str());
             }
@@ -465,7 +473,7 @@ int main(int argc, char ** argv) {
         mtim.tv_sec = READ64LE(header->model_mtim_sec);
         mtim.tv_nsec = READ64LE(header->model_mtim_nsec);
         if (CompareTime(model_stat.st_mtim, mtim) > 0) {
-            if (params.verbose) {
+            if (params.verbose > 0) {
                 fprintf(stderr, "%s: model file timestamp changed; will reload and regenerate prompt\n",
                         params.prompt_path.c_str());
             }
@@ -481,7 +489,7 @@ int main(int argc, char ** argv) {
         // check prompt textus
         if (prompt_size != params.prompt.size() ||
             memcmp(header + 1, params.prompt.c_str(), prompt_size) != 0) {
-            if (params.verbose) {
+            if (params.verbose > 0) {
                 fprintf(stderr, "%s: prompt text changed; will reload and regenerate\n",
                         params.prompt_path.c_str());
             }
@@ -490,7 +498,7 @@ int main(int argc, char ** argv) {
         // read the transformer state
         llama_set_state_data(ctx, (uint8_t *)(header + 1) + prompt_size);
         // we're finished loading the prompt file
-        if (params.verbose) {
+        if (params.verbose > 0) {
             fprintf(stderr, "%s: %s: reloaded previously saved prompt\n",
                     __func__, params.prompt_path.c_str());
         }
@@ -508,7 +516,7 @@ int main(int argc, char ** argv) {
         close(fd);
     }
 
-    if (prompt_status == kPromptPending && params.verbose) {
+    if (prompt_status == kPromptPending && params.verbose > 0) {
         // the first thing we will do is to output the prompt, so set color accordingly
         console_set_color(con_st, CONSOLE_COLOR_PROMPT);
     }
@@ -535,11 +543,13 @@ int main(int argc, char ** argv) {
                 if (n_eval > params.n_batch) {
                     n_eval = params.n_batch;
                 }
+                is_stalled = n_eval > 1;
                 if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads)) {
                     fprintf(stderr, "%s : failed to eval\n", __func__);
                     console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
                     return 1;
                 }
+                is_stalled = false;
                 n_past += n_eval;
                 if (prompt_status == kPromptPending &&
                     !params.verbose && con_st.use_color && embd_inp.size()) {
@@ -599,11 +609,11 @@ int main(int argc, char ** argv) {
             llama_copy_state_data(ctx, (uint8_t *)map + sizeof(header) + params.prompt.size());
             memcpy((uint8_t *)map + sizeof(header), params.prompt.c_str(), params.prompt.size());
             memcpy(map, &header, sizeof(header));
-            if (msync(map, file_size, MS_ASYNC) && params.verbose) {
+            if (msync(map, file_size, MS_ASYNC) && params.verbose > 0) {
                 fprintf(stderr, "%s: msync failed: %s\n",
                         tmppath.c_str(), strerror(errno));
             }
-            if (munmap(map, file_size) && params.verbose) {
+            if (munmap(map, file_size) && params.verbose > 0) {
                 fprintf(stderr, "%s: munmap failed: %s\n",
                         tmppath.c_str(), strerror(errno));
             }
@@ -877,7 +887,7 @@ int main(int argc, char ** argv) {
         if (!embd.empty() && embd.back() == llama_token_eos()) {
             if (params.instruct) {
                 is_interacting = true;
-            } else if (params.verbose) {
+            } else if (params.verbose > 0) {
                 fprintf(stderr, " [end of text]\n");
                 break;
             }
@@ -893,13 +903,13 @@ int main(int argc, char ** argv) {
     if (is_terminated) {
         console_cleanup(con_st);
         printf("\n");
-        if (params.verbose) {
+        if (params.verbose > 0) {
             llama_print_timings(ctx);
         }
         _exit(128 + SIGINT);
     }
 
-    if (params.verbose) {
+    if (params.verbose > 0) {
         llama_print_timings(ctx);
     }
     llama_free(ctx);