From c574bddb368424b5996cbee2ec45ec050967d404 Mon Sep 17 00:00:00 2001
From: Bono Lv <lvscar@users.noreply.github.com>
Date: Tue, 1 Aug 2023 20:54:28 +0800
Subject: [PATCH 1/4] fix a typo in examples/server/README.md (#2478)

---
 examples/server/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/README.md b/examples/server/README.md
index e5ca8269b..aee31ae42 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -163,7 +163,7 @@ node .
 
     `content`: Set the text to tokenize.
 
-    Note that the special `BOS` token is not added in fron of the text and also a space character is not inserted automatically as it is for `/completion`.
+    Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
 
 -   **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
 

From a312193e184b919047f33a5e844d846c5538dbe6 Mon Sep 17 00:00:00 2001
From: Yiming Cui <conandiy@vip.qq.com>
Date: Wed, 2 Aug 2023 14:18:31 +0800
Subject: [PATCH 2/4] readme : Add Chinese LLaMA-2 / Alpaca-2 to supported
 models (#2475)

* add support for chinese llama-2 / alpaca-2

* remove white spaces
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b231d24b8..515c80c42 100644
--- a/README.md
+++ b/README.md
@@ -80,7 +80,7 @@ as the main playground for developing new features for the [ggml](https://github
 - [x] LLaMA 2 🦙🦙
 - [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca)
 - [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all)
-- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
+- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
 - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
 - [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894)
 - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)

From 81844fbcfd93a162b7aeaea9e4f2ab1358f7f97e Mon Sep 17 00:00:00 2001
From: Eve <139727413+netrunnereve@users.noreply.github.com>
Date: Wed, 2 Aug 2023 04:06:19 -0400
Subject: [PATCH 3/4] tests : Fix compilation warnings (Linux/GCC) (#2451)

* fix hellaswag print format, cast away warning in test-double-float

* c++11 cannot use designated initializers

* add static to test-grad0.c internal functions

* use memcpy in test-double-float.c

* port c tests to c++

* use initializer list for ggml_init_params
---
 Makefile                                      |  6 ++--
 examples/common.cpp                           |  2 +-
 scripts/sync-ggml.sh                          |  4 +--
 tests/CMakeLists.txt                          |  6 ++--
 ...t-double-float.c => test-double-float.cpp} | 12 ++++---
 tests/{test-grad0.c => test-grad0.cpp}        | 32 +++++++++----------
 tests/{test-opt.c => test-opt.cpp}            | 15 +++++----
 7 files changed, 40 insertions(+), 37 deletions(-)
 rename tests/{test-double-float.c => test-double-float.cpp} (88%)
 rename tests/{test-grad0.c => test-grad0.cpp} (98%)
 rename tests/{test-opt.c => test-opt.cpp} (96%)

diff --git a/Makefile b/Makefile
index 100614b4b..a692a39ea 100644
--- a/Makefile
+++ b/Makefile
@@ -411,13 +411,13 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
 vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
 
-tests/test-double-float: tests/test-double-float.c build-info.h ggml.o llama.o common.o $(OBJS)
+tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 
-tests/test-grad0: tests/test-grad0.c build-info.h ggml.o llama.o common.o $(OBJS)
+tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 
-tests/test-opt: tests/test-opt.c build-info.h ggml.o llama.o common.o $(OBJS)
+tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
 
 tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)
diff --git a/examples/common.cpp b/examples/common.cpp
index e6439841d..3e7c3b696 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -572,7 +572,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stdout, "  --temp N              temperature (default: %.1f)\n", (double)params.temp);
     fprintf(stdout, "  --perplexity          compute perplexity over each ctx window of the prompt\n");
     fprintf(stdout, "  --hellaswag           compute HellaSwag score over random tasks from datafile supplied with -f\n");
-    fprintf(stdout, "  --hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %d)\n", params.hellaswag_tasks);
+    fprintf(stdout, "  --hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
     fprintf(stdout, "  --keep N              number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
     fprintf(stdout, "  --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
     if (llama_mlock_supported()) {
diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh
index 02ea6ec15..3d13e852a 100755
--- a/scripts/sync-ggml.sh
+++ b/scripts/sync-ggml.sh
@@ -10,5 +10,5 @@ cp -rpv ../ggml/src/ggml-metal.m     ./ggml-metal.m
 cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
 cp -rpv ../ggml/include/ggml/ggml.h  ./ggml.h
 
-cp -rpv ../ggml/tests/test-opt.c    ./tests/test-opt.c
-cp -rpv ../ggml/tests/test-grad0.c  ./tests/test-grad0.c
+cp -rpv ../ggml/tests/test-opt.cpp    ./tests/test-opt.cpp
+cp -rpv ../ggml/tests/test-grad0.cpp  ./tests/test-grad0.cpp
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 11ec6c725..1a40edbec 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -6,10 +6,10 @@ function(llama_add_test source)
     add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
 endfunction()
 
-# llama_add_test(test-double-float.c) # SLOW
+# llama_add_test(test-double-float.cpp) # SLOW
 llama_add_test(test-quantize-fns.cpp)
 llama_add_test(test-quantize-perf.cpp)
 llama_add_test(test-sampling.cpp)
 llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
-llama_add_test(test-grad0.c) # SLOW
-# llama_add_test(test-opt.c) # SLOW
+llama_add_test(test-grad0.cpp) # SLOW
+# llama_add_test(test-opt.cpp) # SLOW
diff --git a/tests/test-double-float.c b/tests/test-double-float.cpp
similarity index 88%
rename from tests/test-double-float.c
rename to tests/test-double-float.cpp
index 89dafc9f2..b506f273f 100644
--- a/tests/test-double-float.c
+++ b/tests/test-double-float.cpp
@@ -3,10 +3,11 @@
 // This is done by checking all finite (non-NaN, non-infinite) floats.
 
 #undef NDEBUG
-#include <assert.h>
+#include <cassert>
 #include <immintrin.h>
-#include <math.h>
-#include <stdint.h>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdouble-promotion"
@@ -32,8 +33,9 @@ inline static float silu_float(float x) {
 int main(void) {
     uint32_t x = UINT32_MAX;
     do {
-        float f = *(float *)&x;
-        assert(!isfinite(f) || (round_orig(f) == round_float(f)));
+        float f;
+        memcpy(&f, &x, sizeof(x));
+        assert(!std::isfinite(f) || (round_orig(f) == round_float(f)));
     } while (x--);
 
 #ifdef __F16C__
diff --git a/tests/test-grad0.c b/tests/test-grad0.cpp
similarity index 98%
rename from tests/test-grad0.c
rename to tests/test-grad0.cpp
index 6d312216d..75a698d73 100644
--- a/tests/test-grad0.c
+++ b/tests/test-grad0.cpp
@@ -1,10 +1,10 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #include "ggml.h"
 
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@@ -47,16 +47,16 @@
 
 #define GGML_PRINT(...) printf(__VA_ARGS__)
 
-float frand(void) {
+static float frand(void) {
     return (float)rand()/(float)RAND_MAX;
 }
 
-int irand(int n) {
+static int irand(int n) {
     if (n == 0) return 0;
     return rand()%n;
 }
 
-void get_random_dims(int64_t * dims, int ndims) {
+static void get_random_dims(int64_t * dims, int ndims) {
     dims[0] = dims[1] = dims[2] = dims[3] = 1;
 
     for (int i = 0; i < ndims; i++) {
@@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) {
     }
 }
 
-struct ggml_tensor * get_random_tensor_f32(
+static struct ggml_tensor * get_random_tensor_f32(
         struct ggml_context * ctx0,
         int ndims,
         int64_t ne[],
@@ -112,7 +112,7 @@ struct ggml_tensor * get_random_tensor_f32(
     return result;
 }
 
-struct ggml_tensor * get_random_tensor_f16(
+static struct ggml_tensor * get_random_tensor_f16(
         struct ggml_context * ctx0,
         int ndims,
         int64_t ne[],
@@ -160,7 +160,7 @@ struct ggml_tensor * get_random_tensor_f16(
     return result;
 }
 
-struct ggml_tensor * get_random_tensor_i32(
+static struct ggml_tensor * get_random_tensor_i32(
         struct ggml_context * ctx0,
         int ndims,
         int64_t ne[],
@@ -208,7 +208,7 @@ struct ggml_tensor * get_random_tensor_i32(
     return result;
 }
 
-void print_elements(const char* label, const struct ggml_tensor * t) {
+static void print_elements(const char* label, const struct ggml_tensor * t) {
     if (!t) {
         printf("%s: %s = null\n", __func__, label);
         return;
@@ -228,7 +228,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
 
 }
 
-bool check_gradient(
+static bool check_gradient(
         const char * op_name,
         struct ggml_context * ctx0,
         struct ggml_tensor * x[],
@@ -310,7 +310,7 @@ bool check_gradient(
 }
 
 // TODO: clean-up this ..
-bool check_mat_mul(
+static bool check_mat_mul(
         const struct ggml_tensor * y,
         const struct ggml_tensor * x0,
         const struct ggml_tensor * x1) {
@@ -373,9 +373,9 @@ bool check_mat_mul(
 
 int main(int argc, const char ** argv) {
     struct ggml_init_params params = {
-        .mem_size   = 128*1024*1024,
-        .mem_buffer = NULL,
-        .no_alloc   = false,
+        /* .mem_size   = */ 128*1024*1024,
+        /* .mem_buffer = */ NULL,
+        /* .no_alloc   = */ false,
     };
 
     int64_t ne[4];
diff --git a/tests/test-opt.c b/tests/test-opt.cpp
similarity index 96%
rename from tests/test-opt.c
rename to tests/test-opt.cpp
index 4eef62bcf..8ab240202 100644
--- a/tests/test-opt.c
+++ b/tests/test-opt.cpp
@@ -1,9 +1,9 @@
 #include "ggml.h"
 
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
 
 #define MAX_NARGS 2
 
@@ -119,10 +119,11 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
 
 int main(void) {
     struct ggml_init_params params = {
-        .mem_size   = 1024*1024*1024,
-        .mem_buffer = NULL,
-        .no_alloc   = false,
+        /* .mem_size   = */ 1024*1024*1024,
+        /* .mem_buffer = */ NULL,
+        /* .no_alloc   = */ false,
     };
+
     struct ggml_context * ctx = ggml_init(params);
 
     int64_t ne1[4] = {4, 128, 1, 1};

From 220d9318647a8ce127dbf7c9de5400455f41e7d8 Mon Sep 17 00:00:00 2001
From: ldwang <ftgreat@163.com>
Date: Wed, 2 Aug 2023 16:21:11 +0800
Subject: [PATCH 4/4] readme : add Aquila-7B model series to supported models
 (#2487)

* support bpe tokenizer in convert

Signed-off-by: ldwang <ftgreat@gmail.com>

* support bpe tokenizer in convert

Signed-off-by: ldwang <ftgreat@gmail.com>

* support bpe tokenizer in convert, fix

Signed-off-by: ldwang <ftgreat@gmail.com>

* Add Aquila-7B models in README.md

Signed-off-by: ldwang <ftgreat@gmail.com>

* Up Aquila-7B models in README.md

Signed-off-by: ldwang <ftgreat@gmail.com>

---------

Signed-off-by: ldwang <ftgreat@gmail.com>
Co-authored-by: ldwang <ftgreat@gmail.com>
---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 515c80c42..2ece294b7 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,7 @@ as the main playground for developing new features for the [ggml](https://github
 - [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
 - [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
 - [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
+- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
 
 **Bindings:**
 
@@ -492,6 +493,9 @@ Building the program with BLAS support may lead to some performance improvements
 # obtain the original LLaMA model weights and place them in ./models
 ls ./models
 65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
+  # [Optional] for models using BPE tokenizers
+  ls ./models
+  65B 30B 13B 7B vocab.json
 
 # install Python dependencies
 python3 -m pip install -r requirements.txt
@@ -499,6 +503,9 @@ python3 -m pip install -r requirements.txt
 # convert the 7B model to ggml FP16 format
 python3 convert.py models/7B/
 
+  # [Optional] for models using BPE tokenizers
+  python convert.py models/7B/ --vocabtype bpe
+
 # quantize the model to 4-bits (using q4_0 method)
 ./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0