From 3cb9de2c4ad3ff128ffdd8bdf68531915f749c88 Mon Sep 17 00:00:00 2001
From: KerfuffleV2 <kerfliffle@keemail.me>
Date: Sat, 10 Jun 2023 09:01:41 -0600
Subject: [PATCH] Clean up size output, use uppercase for quant types

---
 examples/quantize/quantize.cpp | 78 +++++++++++++++++-----------------
 ggml.c                         |  1 -
 2 files changed, 39 insertions(+), 40 deletions(-)
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index f42ad0c41..d191cb1b2 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -17,101 +17,101 @@ struct quant_option {
 
 static const std::vector<struct quant_option> QUANT_OPTIONS = {
     {
-        "q4_0",
+        "Q4_0",
         LLAMA_FTYPE_MOSTLY_Q4_0,
-        "approx +0.2499 perplexity, 3.50G output @ 7B",
+        "approx +0.2499 perplexity, 3.50 GiB size @ 7B",
     },
     {
-        "q4_1",
+        "Q4_1",
         LLAMA_FTYPE_MOSTLY_Q4_1,
-        "approx +0.1846 perplexity, 3.90G output @ 7B",
+        "approx +0.1846 perplexity, 3.90 GiB size @ 7B",
     },
     {
-        "q5_0",
+        "Q5_0",
         LLAMA_FTYPE_MOSTLY_Q5_0,
-        "approx +0.0796 perplexity, 4.30G output @ 7B",
+        "approx +0.0796 perplexity, 4.30 GiB size @ 7B",
     },
     {
-        "q5_1",
+        "Q5_1",
         LLAMA_FTYPE_MOSTLY_Q5_1,
-        "approx +0.0415 perplexity, 4.70G output @ 7B",
+        "approx +0.0415 perplexity, 4.70 GiB size @ 7B",
     },
 #ifdef GGML_USE_K_QUANTS
     {
-        "q2_k",
+        "Q2_K",
         LLAMA_FTYPE_MOSTLY_Q2_K,
-        "approx +0.8698 perplexity, 2.67G output @ 7B",
+        "approx +0.8698 perplexity, 2.67 GiB size @ 7B",
     },
     {
-        "q3_k",
+        "Q3_K",
         LLAMA_FTYPE_MOSTLY_Q3_K_M,
-        "alias for q3_k_m"
+        "alias for Q3_K_M"
     },
     {
-        "q3_k_s",
+        "Q3_K_S",
         LLAMA_FTYPE_MOSTLY_Q3_K_S,
-        "approx +0.5505 perplexity, 2.75G output @ 7B",
+        "approx +0.5505 perplexity, 2.75 GiB size @ 7B",
     },
     {
-        "q3_k_m",
+        "Q3_K_M",
         LLAMA_FTYPE_MOSTLY_Q3_K_M,
-        "approx +0.2437 perplexity, 3.06G output @ 7B",
+        "approx +0.2437 perplexity, 3.06 GiB size @ 7B",
     },
     {
-        "q3_k_l",
+        "Q3_K_L",
         LLAMA_FTYPE_MOSTLY_Q3_K_L,
-        "approx +0.1803 perplexity, 3.35G output @ 7B",
+        "approx +0.1803 perplexity, 3.35 GiB size @ 7B",
     },
     {
-        "q4_k",
+        "Q4_K",
         LLAMA_FTYPE_MOSTLY_Q4_K_M,
-        "alias for q4_k_m",
+        "alias for Q4_K_M",
     },
     {
-        "q4_k_s",
+        "Q4_K_S",
         LLAMA_FTYPE_MOSTLY_Q4_K_S,
-        "approx +0.1149 perplexity, 3.56G output @ 7B",
+        "approx +0.1149 perplexity, 3.56 GiB size @ 7B",
     },
     {
-        "q4_k_m",
+        "Q4_K_M",
         LLAMA_FTYPE_MOSTLY_Q4_K_M,
-        "approx +0.0535 perplexity, 3.80G output @ 7B",
+        "approx +0.0535 perplexity, 3.80 GiB size @ 7B",
     },
     {
-        "q5_k",
+        "Q5_K",
         LLAMA_FTYPE_MOSTLY_Q5_K_M,
-        "alias for q5_k_m",
+        "alias for Q5_K_M",
     },
     {
-        "q5_k_s",
+        "Q5_K_S",
         LLAMA_FTYPE_MOSTLY_Q5_K_S,
-        "approx +0.0353 perplexity, 4.33G output @ 7B",
+        "approx +0.0353 perplexity, 4.33 GiB size @ 7B",
     },
     {
-        "q5_k_m",
+        "Q5_K_M",
         LLAMA_FTYPE_MOSTLY_Q5_K_M,
-        "approx +0.0142 perplexity, 4.45G output @ 7B",
+        "approx +0.0142 perplexity, 4.45 GiB size @ 7B",
     },
     {
-        "q6_k",
+        "Q6_K",
         LLAMA_FTYPE_MOSTLY_Q6_K,
-        "approx +0.0044 perplexity, 5.15G output @ 7B",
+        "approx +0.0044 perplexity, 5.15 GiB size @ 7B",
     },
 #endif
     {
-        "q8_0",
+        "Q8_0",
         LLAMA_FTYPE_MOSTLY_Q8_0,
-        "approx +0.0004 perplexity, 6.70G output @ 7B",
+        "approx +0.0004 perplexity, 6.70 GiB size @ 7B",
     },
     {
-        "f16",
+        "F16",
         LLAMA_FTYPE_MOSTLY_F16,
-        "no significant perplexity increase, 13.00G output @ 7B",
+        "no significant perplexity increase, 13.00 GiB size @ 7B",
     },
     {
-        "f32",
+        "F32",
         LLAMA_FTYPE_ALL_F32,
-        "full quality, 26.00G output @ 7B",
+        "full quality, 26.00 GiB size @ 7B",
     },
 };
 
@@ -120,7 +120,7 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
     std::string ftype_str;
 
     for (auto ch : ftype_str_in) {
-        ftype_str.push_back(std::tolower(ch));
+        ftype_str.push_back(std::toupper(ch));
     }
     for (auto & it : QUANT_OPTIONS) {
         if (it.name == ftype_str) {
diff --git a/ggml.c b/ggml.c
index 05229085c..252edd582 100644
--- a/ggml.c
+++ b/ggml.c
@@ -16312,7 +16312,6 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
                 int elemsize = sizeof(float);
                 result = n * elemsize;
                 memcpy((uint8_t *)dst + start * elemsize, src + start, result);
-
             } break;
         default:
             assert(false);