From 2d3481c72125cd388258864c7ad8d7d36777bad7 Mon Sep 17 00:00:00 2001
From: nanahi <130121847+na-na-hi@users.noreply.github.com>
Date: Sun, 16 Apr 2023 17:13:42 +0800
Subject: [PATCH 1/3] Fix msys2 build error and warnings (#1009)

---
 llama.cpp    | 1 +
 llama_util.h | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/llama.cpp b/llama.cpp
index be8c4cdc1..a0d7e5137 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -9,6 +9,7 @@
 #include "ggml.h"
 
 #include <array>
+#include <ctime>
 #include <cinttypes>
 #include <fstream>
 #include <random>
diff --git a/llama_util.h b/llama_util.h
index 653bf7138..d2110ebb4 100755
--- a/llama_util.h
+++ b/llama_util.h
@@ -43,8 +43,12 @@
     } while (0)
 
 #ifdef __GNUC__
+#ifdef __MINGW32__
+__attribute__((format(gnu_printf, 1, 2)))
+#else
 __attribute__((format(printf, 1, 2)))
 #endif
+#endif
 static std::string format(const char * fmt, ...) {
     va_list ap, ap2;
     va_start(ap, fmt);
@@ -57,7 +61,7 @@ static std::string format(const char * fmt, ...) {
     va_end(ap2);
     va_end(ap);
     return std::string(buf.data(), size);
-};
+}
 
 struct llama_file {
     // use FILE * so we don't have to re-open the file to mmap

From 489537e6cf6c93b74a029a11533dbcaa89791dcc Mon Sep 17 00:00:00 2001
From: Pavol Rusnak <pavol@rusnak.io>
Date: Sun, 16 Apr 2023 12:13:00 +0200
Subject: [PATCH 2/3] examples: add missing <ctime> include for time() (#1011)

---
 examples/embedding/embedding.cpp   | 2 ++
 examples/main/main.cpp             | 1 +
 examples/perplexity/perplexity.cpp | 1 +
 3 files changed, 4 insertions(+)

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 2eda3ac01..e10de619c 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -1,6 +1,8 @@
 #include "common.h"
 #include "llama.h"
 
+#include <ctime>
+
 int main(int argc, char ** argv) {
     gpt_params params;
     params.model = "models/llama-7B/ggml-model.bin";
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index ba153cb82..3e4b0034e 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -11,6 +11,7 @@
 #include <cmath>
 #include <cstdio>
 #include <cstring>
+#include <ctime>
 #include <fstream>
 #include <iostream>
 #include <string>
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 38e3643b1..19449e16e 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -2,6 +2,7 @@
 #include "llama.h"
 
 #include <cmath>
+#include <ctime>
 
 std::vector<float> softmax(const std::vector<float>& logits) {
     std::vector<float> probs(logits.size());

From 3173a62eb9f90b94fb3184131032c1c8b7aa8d86 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 16 Apr 2023 13:58:48 +0300
Subject: [PATCH 3/3] stdout : vertical align outputs for better readibility

---
 convert.py |  5 +++--
 llama.cpp  | 14 +++++++-------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/convert.py b/convert.py
index 4e28a45eb..7b9f043b2 100644
--- a/convert.py
+++ b/convert.py
@@ -951,8 +951,9 @@ class OutputFile:
 
         ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
         for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
-            size = ' x '.join(map(str, lazy_tensor.shape))
-            print(f"[{i+1}/{len(model)}] Writing tensor {name}, size {size}...")
+            size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
+            padi = len(str(len(model)))
+            print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
             of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
             ndarray.tofile(of.fout)
         of.fout.close()
diff --git a/llama.cpp b/llama.cpp
index a0d7e5137..a6429a4e7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -262,12 +262,12 @@ static size_t checked_div(size_t a, size_t b) {
 }
 
 static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
-    std::string ret = "[" + std::to_string(ne.at(0));
+    char buf[256];
+    snprintf(buf, sizeof(buf), "%5u", ne.at(0));
     for (size_t i = 1; i < ne.size(); i++) {
-        ret += " x " + std::to_string(ne.at(i));
+        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i));
     }
-    ret += "]";
-    return ret;
+    return buf;
 }
 
 static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
@@ -942,8 +942,8 @@ static void llama_model_load_internal(
         ml->ggml_ctx = ctx;
 
         model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", {n_embd, n_vocab});
-        model.norm   = ml->get_tensor("norm.weight", {n_embd});
-        model.output = ml->get_tensor("output.weight", {n_embd, n_vocab});
+        model.norm           = ml->get_tensor("norm.weight",           {n_embd});
+        model.output         = ml->get_tensor("output.weight",         {n_embd, n_vocab});
 
         model.layers.resize(n_layer);
         for (uint32_t i = 0; i < n_layer; ++i) {
@@ -1570,7 +1570,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         tensor.data = read_data.addr;
         model_loader->load_data_for(tensor);
 
-        printf("[%zu/%zu] %36s - %s, type = %6s, ",
+        printf("[%4zu/%4zu] %36s - %16s, type = %6s, ",
                ++idx, model_loader->tensors_map.tensors.size(),
                tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
                ggml_type_name(tensor.type));