From 2d3481c72125cd388258864c7ad8d7d36777bad7 Mon Sep 17 00:00:00 2001 From: nanahi <130121847+na-na-hi@users.noreply.github.com> Date: Sun, 16 Apr 2023 17:13:42 +0800 Subject: [PATCH 1/3] Fix msys2 build error and warnings (#1009) --- llama.cpp | 1 + llama_util.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index be8c4cdc1..a0d7e5137 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9,6 +9,7 @@ #include "ggml.h" #include +#include #include #include #include diff --git a/llama_util.h b/llama_util.h index 653bf7138..d2110ebb4 100755 --- a/llama_util.h +++ b/llama_util.h @@ -43,8 +43,12 @@ } while (0) #ifdef __GNUC__ +#ifdef __MINGW32__ +__attribute__((format(gnu_printf, 1, 2))) +#else __attribute__((format(printf, 1, 2))) #endif +#endif static std::string format(const char * fmt, ...) { va_list ap, ap2; va_start(ap, fmt); @@ -57,7 +61,7 @@ static std::string format(const char * fmt, ...) { va_end(ap2); va_end(ap); return std::string(buf.data(), size); -}; +} struct llama_file { // use FILE * so we don't have to re-open the file to mmap From 489537e6cf6c93b74a029a11533dbcaa89791dcc Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Sun, 16 Apr 2023 12:13:00 +0200 Subject: [PATCH 2/3] examples: add missing include for time() (#1011) --- examples/embedding/embedding.cpp | 2 ++ examples/main/main.cpp | 1 + examples/perplexity/perplexity.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 2eda3ac01..e10de619c 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -1,6 +1,8 @@ #include "common.h" #include "llama.h" +#include + int main(int argc, char ** argv) { gpt_params params; params.model = "models/llama-7B/ggml-model.bin"; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index ba153cb82..3e4b0034e 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 38e3643b1..19449e16e 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -2,6 +2,7 @@ #include "llama.h" #include +#include std::vector softmax(const std::vector& logits) { std::vector probs(logits.size()); From 3173a62eb9f90b94fb3184131032c1c8b7aa8d86 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 16 Apr 2023 13:58:48 +0300 Subject: [PATCH 3/3] stdout : vertical align outputs for better readibility --- convert.py | 5 +++-- llama.cpp | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/convert.py b/convert.py index 4e28a45eb..7b9f043b2 100644 --- a/convert.py +++ b/convert.py @@ -951,8 +951,9 @@ class OutputFile: ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8) for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)): - size = ' x '.join(map(str, lazy_tensor.shape)) - print(f"[{i+1}/{len(model)}] Writing tensor {name}, size {size}...") + size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) + padi = len(str(len(model))) + print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}") of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type) ndarray.tofile(of.fout) of.fout.close() diff --git a/llama.cpp b/llama.cpp index a0d7e5137..a6429a4e7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -262,12 +262,12 @@ static size_t checked_div(size_t a, size_t b) { } static std::string llama_format_tensor_shape(const std::vector & ne) { - std::string ret = "[" + std::to_string(ne.at(0)); + char buf[256]; + snprintf(buf, sizeof(buf), "%5u", ne.at(0)); for (size_t i = 1; i < ne.size(); i++) { - ret += " x " + std::to_string(ne.at(i)); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i)); } - ret += "]"; - return ret; + return buf; } static size_t llama_calc_tensor_size(const std::vector & ne, enum ggml_type type) { @@ -942,8 +942,8 @@ static void llama_model_load_internal( ml->ggml_ctx = ctx; model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", {n_embd, n_vocab}); - model.norm = ml->get_tensor("norm.weight", {n_embd}); - model.output = ml->get_tensor("output.weight", {n_embd, n_vocab}); + model.norm = ml->get_tensor("norm.weight", {n_embd}); + model.output = ml->get_tensor("output.weight", {n_embd, n_vocab}); model.layers.resize(n_layer); for (uint32_t i = 0; i < n_layer; ++i) { @@ -1570,7 +1570,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s tensor.data = read_data.addr; model_loader->load_data_for(tensor); - printf("[%zu/%zu] %36s - %s, type = %6s, ", + printf("[%4zu/%4zu] %36s - %16s, type = %6s, ", ++idx, model_loader->tensors_map.tensors.size(), tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(), ggml_type_name(tensor.type));