From b3ea026c924705467b6f34fec73e05bc612db798 Mon Sep 17 00:00:00 2001
From: KerfuffleV2 <kerfliffle@keemail.me>
Date: Tue, 13 Jun 2023 03:14:06 -0600
Subject: [PATCH] Improve help output in quantize tool

---
 examples/quantize/quantize.cpp | 40 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 877c31373..4e8e6f523 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -17,28 +17,28 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     {
         "Q4_0",
         LLAMA_FTYPE_MOSTLY_Q4_0,
-        "approx +0.2499 perplexity, 3.50 GiB size @ 7B",
+        " 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M",
     },
     {
         "Q4_1",
         LLAMA_FTYPE_MOSTLY_Q4_1,
-        "approx +0.1846 perplexity, 3.90 GiB size @ 7B",
+        " 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L",
     },
     {
         "Q5_0",
         LLAMA_FTYPE_MOSTLY_Q5_0,
-        "approx +0.0796 perplexity, 4.30 GiB size @ 7B",
+        " 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M",
     },
     {
         "Q5_1",
         LLAMA_FTYPE_MOSTLY_Q5_1,
-        "approx +0.0415 perplexity, 4.70 GiB size @ 7B",
+        " 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M",
     },
 #ifdef GGML_USE_K_QUANTS
     {
         "Q2_K",
         LLAMA_FTYPE_MOSTLY_Q2_K,
-        "approx +0.8698 perplexity, 2.67 GiB size @ 7B",
+        " 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended",
     },
     {
         "Q3_K",
@@ -48,17 +48,17 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     {
         "Q3_K_S",
         LLAMA_FTYPE_MOSTLY_Q3_K_S,
-        "approx +0.5505 perplexity, 2.75 GiB size @ 7B",
+        " 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss",
     },
     {
         "Q3_K_M",
         LLAMA_FTYPE_MOSTLY_Q3_K_M,
-        "approx +0.2437 perplexity, 3.06 GiB size @ 7B",
+        " 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss",
     },
     {
         "Q3_K_L",
         LLAMA_FTYPE_MOSTLY_Q3_K_L,
-        "approx +0.1803 perplexity, 3.35 GiB size @ 7B",
+        " 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss",
     },
     {
         "Q4_K",
@@ -68,12 +68,12 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     {
         "Q4_K_S",
         LLAMA_FTYPE_MOSTLY_Q4_K_S,
-        "approx +0.1149 perplexity, 3.56 GiB size @ 7B",
+        " 3.56G, +0.1149 ppl @ 7B - small, significant quality loss",
     },
     {
         "Q4_K_M",
         LLAMA_FTYPE_MOSTLY_Q4_K_M,
-        "approx +0.0535 perplexity, 3.80 GiB size @ 7B",
+        " 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*",
     },
     {
         "Q5_K",
@@ -83,33 +83,33 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     {
         "Q5_K_S",
         LLAMA_FTYPE_MOSTLY_Q5_K_S,
-        "approx +0.0353 perplexity, 4.33 GiB size @ 7B",
+        " 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*",
     },
     {
         "Q5_K_M",
         LLAMA_FTYPE_MOSTLY_Q5_K_M,
-        "approx +0.0142 perplexity, 4.45 GiB size @ 7B",
+        " 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*",
     },
     {
         "Q6_K",
         LLAMA_FTYPE_MOSTLY_Q6_K,
-        "approx +0.0044 perplexity, 5.15 GiB size @ 7B",
+        " 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss",
     },
 #endif
     {
         "Q8_0",
         LLAMA_FTYPE_MOSTLY_Q8_0,
-        "approx +0.0004 perplexity, 6.70 GiB size @ 7B",
+        " 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended",
     },
     {
         "F16",
         LLAMA_FTYPE_MOSTLY_F16,
-        "no significant perplexity increase, 13.00 GiB size @ 7B",
+        "13.00G              @ 7B - extremely large, virtually no quality loss - not recommended",
     },
     {
         "F32",
         LLAMA_FTYPE_ALL_F32,
-        "full quality, 26.00 GiB size @ 7B",
+        "26.00G              @ 7B - absolutely huge, lossless - not recommended",
     },
 };
 
@@ -144,15 +144,15 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
 }
 
 // usage:
-//  ./quantize models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
+//  ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
 //
 void usage(const char * executable) {
-    fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n", executable);
+    fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable);
     fprintf(stderr, "  --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
     fprintf(stderr, "  --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
-    fprintf(stderr, "Allowed quantization types:\n");
+    fprintf(stderr, "\nAllowed quantization types:\n");
     for (auto & it : QUANT_OPTIONS) {
-        printf("  %2d  or  %6s  :  %s\n", it.ftype, it.name.c_str(), it.desc.c_str());
+        printf("  %2d  or  %-6s : %s\n", it.ftype, it.name.c_str(), it.desc.c_str());
     }
     exit(1);
 }