From a56d09a4407f29c21e149b44fd5308f83aa1cb09 Mon Sep 17 00:00:00 2001
From: Pierrick Hymbert <pierrick.hymbert@gmail.com>
Date: Sat, 16 Mar 2024 13:20:53 +0100
Subject: [PATCH 1/5] ci : close inactive issue with workflow (#6053)

* issues: ci - close inactive issue with workflow

* ci: close issue, change workflow schedule time
---
 .github/workflows/close-issue.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 .github/workflows/close-issue.yml

diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml
new file mode 100644
index 000000000..bc08a72d0
--- /dev/null
+++ b/.github/workflows/close-issue.yml
@@ -0,0 +1,22 @@
+name: Close inactive issues
+on:
+  schedule:
+    - cron: "42 0 * * *"
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v5
+        with:
+          days-before-issue-stale: 30
+          days-before-issue-close: 14
+          stale-issue-label: "stale"
+          stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
+          close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
+          days-before-pr-stale: -1
+          days-before-pr-close: -1
+          repo-token: ${{ secrets.GITHUB_TOKEN }}

From 15961ec04dbd59d21d8984d42e4c0f7e7e7d320a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?= <dranger003@gmail.com>
Date: Sat, 16 Mar 2024 11:39:15 -0400
Subject: [PATCH 2/5] common : refactor nested if causing error C1061 on MSVC
 (#6101)

* Refactor nested if causing error C1061 on MSVC.

* Revert back and remove else's.

* Add flag to track found arguments.
---
 common/common.cpp | 475 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 356 insertions(+), 119 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 4912237e0..1b0ba8493 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -151,13 +151,17 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             std::replace(arg.begin(), arg.end(), '_', '-');
         }
 
+        bool arg_found = false;
         if (arg == "-s" || arg == "--seed") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.seed = std::stoul(argv[i]);
-        } else if (arg == "-t" || arg == "--threads") {
+        }
+        if (arg == "-t" || arg == "--threads") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -166,7 +170,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (params.n_threads <= 0) {
                 params.n_threads = std::thread::hardware_concurrency();
             }
-        } else if (arg == "-tb" || arg == "--threads-batch") {
+        }
+        if (arg == "-tb" || arg == "--threads-batch") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -175,7 +181,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (params.n_threads_batch <= 0) {
                 params.n_threads_batch = std::thread::hardware_concurrency();
             }
-        } else if (arg == "-td" || arg == "--threads-draft") {
+        }
+        if (arg == "-td" || arg == "--threads-draft") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -184,7 +192,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (params.n_threads_draft <= 0) {
                 params.n_threads_draft = std::thread::hardware_concurrency();
             }
-        } else if (arg == "-tbd" || arg == "--threads-batch-draft") {
+        }
+        if (arg == "-tbd" || arg == "--threads-batch-draft") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -193,25 +203,37 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (params.n_threads_batch_draft <= 0) {
                 params.n_threads_batch_draft = std::thread::hardware_concurrency();
             }
-        } else if (arg == "-p" || arg == "--prompt") {
+        }
+        if (arg == "-p" || arg == "--prompt") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.prompt = argv[i];
-        } else if (arg == "-e" || arg == "--escape") {
+        }
+        if (arg == "-e" || arg == "--escape") {
+            arg_found = true;
             params.escape = true;
-        } else if (arg == "--prompt-cache") {
+        }
+        if (arg == "--prompt-cache") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.path_prompt_cache = argv[i];
-        } else if (arg == "--prompt-cache-all") {
+        }
+        if (arg == "--prompt-cache-all") {
+            arg_found = true;
             params.prompt_cache_all = true;
-        } else if (arg == "--prompt-cache-ro") {
+        }
+        if (arg == "--prompt-cache-ro") {
+            arg_found = true;
             params.prompt_cache_ro = true;
-        } else if (arg == "-bf" || arg == "--binary-file") {
+        }
+        if (arg == "-bf" || arg == "--binary-file") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -228,7 +250,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             ss << file.rdbuf();
             params.prompt = ss.str();
             fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]);
-        } else if (arg == "-f" || arg == "--file") {
+        }
+        if (arg == "-f" || arg == "--file") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -245,51 +269,67 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (!params.prompt.empty() && params.prompt.back() == '\n') {
                 params.prompt.pop_back();
             }
-        } else if (arg == "-n" || arg == "--n-predict") {
+        }
+        if (arg == "-n" || arg == "--n-predict") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_predict = std::stoi(argv[i]);
-        } else if (arg == "--top-k") {
+        }
+        if (arg == "--top-k") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.top_k = std::stoi(argv[i]);
-        } else if (arg == "-c" || arg == "--ctx-size") {
+        }
+        if (arg == "-c" || arg == "--ctx-size") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_ctx = std::stoi(argv[i]);
-        } else if (arg == "--grp-attn-n" || arg == "-gan") {
+        }
+        if (arg == "--grp-attn-n" || arg == "-gan") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
 
             params.grp_attn_n = std::stoi(argv[i]);
-        } else if (arg == "--grp-attn-w" || arg == "-gaw") {
+        }
+        if (arg == "--grp-attn-w" || arg == "-gaw") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
 
             params.grp_attn_w = std::stoi(argv[i]);
-        } else if (arg == "--rope-freq-base") {
+        }
+        if (arg == "--rope-freq-base") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.rope_freq_base = std::stof(argv[i]);
-        } else if (arg == "--rope-freq-scale") {
+        }
+        if (arg == "--rope-freq-scale") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.rope_freq_scale = std::stof(argv[i]);
-        } else if (arg == "--rope-scaling") {
+        }
+        if (arg == "--rope-scaling") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -299,43 +339,57 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
             else if (value == "yarn")   { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
             else { invalid_param = true; break; }
-        } else if (arg == "--rope-scale") {
+        }
+        if (arg == "--rope-scale") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.rope_freq_scale = 1.0f/std::stof(argv[i]);
-        } else if (arg == "--yarn-orig-ctx") {
+        }
+        if (arg == "--yarn-orig-ctx") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.yarn_orig_ctx = std::stoi(argv[i]);
-        } else if (arg == "--yarn-ext-factor") {
+        }
+        if (arg == "--yarn-ext-factor") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.yarn_ext_factor = std::stof(argv[i]);
-        } else if (arg == "--yarn-attn-factor") {
+        }
+        if (arg == "--yarn-attn-factor") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.yarn_attn_factor = std::stof(argv[i]);
-        } else if (arg == "--yarn-beta-fast") {
+        }
+        if (arg == "--yarn-beta-fast") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.yarn_beta_fast = std::stof(argv[i]);
-        } else if (arg == "--yarn-beta-slow") {
+        }
+        if (arg == "--yarn-beta-slow") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.yarn_beta_slow = std::stof(argv[i]);
-        } else if (arg == "--pooling") {
+        }
+        if (arg == "--pooling") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -345,118 +399,156 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
             else if (value == "cls")  { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
             else { invalid_param = true; break; }
-        } else if (arg == "--defrag-thold" || arg == "-dt") {
+        }
+        if (arg == "--defrag-thold" || arg == "-dt") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.defrag_thold = std::stof(argv[i]);
-        } else if (arg == "--samplers") {
+        }
+        if (arg == "--samplers") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             const auto sampler_names = string_split(argv[i], ';');
             sparams.samplers_sequence = sampler_types_from_names(sampler_names, true);
-        } else if (arg == "--sampling-seq") {
+        }
+        if (arg == "--sampling-seq") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.samplers_sequence = sampler_types_from_chars(argv[i]);
-        } else if (arg == "--top-p") {
+        }
+        if (arg == "--top-p") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.top_p = std::stof(argv[i]);
-        } else if (arg == "--min-p") {
+        }
+        if (arg == "--min-p") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.min_p = std::stof(argv[i]);
-        } else if (arg == "--temp") {
+        }
+        if (arg == "--temp") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.temp = std::stof(argv[i]);
             sparams.temp = std::max(sparams.temp, 0.0f);
-        } else if (arg == "--tfs") {
+        }
+        if (arg == "--tfs") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.tfs_z = std::stof(argv[i]);
-        } else if (arg == "--typical") {
+        }
+        if (arg == "--typical") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.typical_p = std::stof(argv[i]);
-        } else if (arg == "--repeat-last-n") {
+        }
+        if (arg == "--repeat-last-n") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.penalty_last_n = std::stoi(argv[i]);
             sparams.n_prev = std::max(sparams.n_prev, sparams.penalty_last_n);
-        } else if (arg == "--repeat-penalty") {
+        }
+        if (arg == "--repeat-penalty") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.penalty_repeat = std::stof(argv[i]);
-        } else if (arg == "--frequency-penalty") {
+        }
+        if (arg == "--frequency-penalty") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.penalty_freq = std::stof(argv[i]);
-        } else if (arg == "--presence-penalty") {
+        }
+        if (arg == "--presence-penalty") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.penalty_present = std::stof(argv[i]);
-        } else if (arg == "--dynatemp-range") {
+        }
+        if (arg == "--dynatemp-range") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.dynatemp_range = std::stof(argv[i]);
-        } else if (arg == "--dynatemp-exp") {
+        }
+        if (arg == "--dynatemp-exp") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.dynatemp_exponent = std::stof(argv[i]);
-        } else if (arg == "--mirostat") {
+        }
+        if (arg == "--mirostat") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.mirostat = std::stoi(argv[i]);
-        } else if (arg == "--mirostat-lr") {
+        }
+        if (arg == "--mirostat-lr") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.mirostat_eta = std::stof(argv[i]);
-        } else if (arg == "--mirostat-ent") {
+        }
+        if (arg == "--mirostat-ent") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.mirostat_tau = std::stof(argv[i]);
-        } else if (arg == "--cfg-negative-prompt") {
+        }
+        if (arg == "--cfg-negative-prompt") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.cfg_negative_prompt = argv[i];
-        } else if (arg == "--cfg-negative-prompt-file") {
+        }
+        if (arg == "--cfg-negative-prompt-file") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -471,86 +563,114 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (!sparams.cfg_negative_prompt.empty() && sparams.cfg_negative_prompt.back() == '\n') {
                 sparams.cfg_negative_prompt.pop_back();
             }
-        } else if (arg == "--cfg-scale") {
+        }
+        if (arg == "--cfg-scale") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.cfg_scale = std::stof(argv[i]);
-        } else if (arg == "-b" || arg == "--batch-size") {
+        }
+        if (arg == "-b" || arg == "--batch-size") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_batch = std::stoi(argv[i]);
-        } else if (arg == "-ub" || arg == "--ubatch-size") {
+        }
+        if (arg == "-ub" || arg == "--ubatch-size") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_ubatch = std::stoi(argv[i]);
-        } else if (arg == "--keep") {
+        }
+        if (arg == "--keep") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_keep = std::stoi(argv[i]);
-        } else if (arg == "--draft") {
+        }
+        if (arg == "--draft") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_draft = std::stoi(argv[i]);
-        } else if (arg == "--chunks") {
+        }
+        if (arg == "--chunks") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_chunks = std::stoi(argv[i]);
-        } else if (arg == "-np" || arg == "--parallel") {
+        }
+        if (arg == "-np" || arg == "--parallel") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_parallel = std::stoi(argv[i]);
-        } else if (arg == "-ns" || arg == "--sequences") {
+        }
+        if (arg == "-ns" || arg == "--sequences") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_sequences = std::stoi(argv[i]);
-        } else if (arg == "--p-split" || arg == "-ps") {
+        }
+        if (arg == "--p-split" || arg == "-ps") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.p_split = std::stof(argv[i]);
-        } else if (arg == "-m" || arg == "--model") {
+        }
+        if (arg == "-m" || arg == "--model") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.model = argv[i];
-        } else if (arg == "-md" || arg == "--model-draft") {
+        }
+        if (arg == "-md" || arg == "--model-draft") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.model_draft = argv[i];
-        } else if (arg == "-a" || arg == "--alias") {
+        }
+        if (arg == "-a" || arg == "--alias") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.model_alias = argv[i];
-        } else if (arg == "--lora") {
+        }
+        if (arg == "--lora") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.lora_adapter.emplace_back(argv[i], 1.0f);
             params.use_mmap = false;
-        } else if (arg == "--lora-scaled") {
+        }
+        if (arg == "--lora-scaled") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -562,19 +682,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             }
             params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
             params.use_mmap = false;
-        } else if (arg == "--lora-base") {
+        }
+        if (arg == "--lora-base") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.lora_base = argv[i];
-        } else if (arg == "--control-vector") {
+        }
+        if (arg == "--control-vector") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.control_vectors.push_back({ 1.0f, argv[i], });
-        } else if (arg == "--control-vector-scaled") {
+        }
+        if (arg == "--control-vector-scaled") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -585,7 +711,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             params.control_vectors.push_back({ std::stof(argv[i]), fname, });
-        } else if (arg == "--control-vector-layer-range") {
+        }
+        if (arg == "--control-vector-layer-range") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -596,49 +724,85 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             params.control_vector_layer_end = std::stoi(argv[i]);
-        } else if (arg == "--mmproj") {
+        }
+        if (arg == "--mmproj") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.mmproj = argv[i];
-        } else if (arg == "--image") {
+        }
+        if (arg == "--image") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.image = argv[i];
-        } else if (arg == "-i" || arg == "--interactive") {
+        }
+        if (arg == "-i" || arg == "--interactive") {
+            arg_found = true;
             params.interactive = true;
-        } else if (arg == "--embedding") {
+        }
+        if (arg == "--embedding") {
+            arg_found = true;
             params.embedding = true;
-        } else if (arg == "--interactive-first") {
+        }
+        if (arg == "--interactive-first") {
+            arg_found = true;
             params.interactive_first = true;
-        } else if (arg == "-ins" || arg == "--instruct") {
+        }
+        if (arg == "-ins" || arg == "--instruct") {
+            arg_found = true;
             params.instruct = true;
-        } else if (arg == "-cml" || arg == "--chatml") {
+        }
+        if (arg == "-cml" || arg == "--chatml") {
+            arg_found = true;
             params.chatml = true;
-        } else if (arg == "--infill") {
+        }
+        if (arg == "--infill") {
+            arg_found = true;
             params.infill = true;
-        } else if (arg == "-dkvc" || arg == "--dump-kv-cache") {
+        }
+        if (arg == "-dkvc" || arg == "--dump-kv-cache") {
+            arg_found = true;
             params.dump_kv_cache = true;
-        } else if (arg == "-nkvo" || arg == "--no-kv-offload") {
+        }
+        if (arg == "-nkvo" || arg == "--no-kv-offload") {
+            arg_found = true;
             params.no_kv_offload = true;
-        } else if (arg == "-ctk" || arg == "--cache-type-k") {
+        }
+        if (arg == "-ctk" || arg == "--cache-type-k") {
+            arg_found = true;
             params.cache_type_k = argv[++i];
-        } else if (arg == "-ctv" || arg == "--cache-type-v") {
+        }
+        if (arg == "-ctv" || arg == "--cache-type-v") {
+            arg_found = true;
             params.cache_type_v = argv[++i];
-        } else if (arg == "--multiline-input") {
+        }
+        if (arg == "--multiline-input") {
+            arg_found = true;
             params.multiline_input = true;
-        } else if (arg == "--simple-io") {
+        }
+        if (arg == "--simple-io") {
+            arg_found = true;
             params.simple_io = true;
-        } else if (arg == "-cb" || arg == "--cont-batching") {
+        }
+        if (arg == "-cb" || arg == "--cont-batching") {
+            arg_found = true;
             params.cont_batching = true;
-        } else if (arg == "--color") {
+        }
+        if (arg == "--color") {
+            arg_found = true;
             params.use_color = true;
-        } else if (arg == "--mlock") {
+        }
+        if (arg == "--mlock") {
+            arg_found = true;
             params.use_mlock = true;
-        } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
+        }
+        if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -648,7 +812,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n");
                 fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
             }
-        } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
+        }
+        if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -658,7 +824,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n");
                 fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
             }
-        } else if (arg == "--main-gpu" || arg == "-mg") {
+        }
+        if (arg == "--main-gpu" || arg == "-mg") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -667,7 +835,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
 #ifndef GGML_USE_CUBLAS_SYCL
             fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting the main GPU has no effect.\n");
 #endif // GGML_USE_CUBLAS_SYCL
-        } else if (arg == "--split-mode" || arg == "-sm") {
+        }
+        if (arg == "--split-mode" || arg == "-sm") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -691,7 +861,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting the split mode has no effect.\n");
 #endif // GGML_USE_CUBLAS_SYCL
 
-        } else if (arg == "--tensor-split" || arg == "-ts") {
+        }
+        if (arg == "--tensor-split" || arg == "-ts") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -716,9 +888,13 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
 #ifndef GGML_USE_CUBLAS_SYCL_VULKAN
             fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL/Vulkan. Setting a tensor split has no effect.\n");
 #endif // GGML_USE_CUBLAS_SYCL
-        } else if (arg == "--no-mmap") {
+        }
+        if (arg == "--no-mmap") {
+            arg_found = true;
             params.use_mmap = false;
-        } else if (arg == "--numa") {
+        }
+        if (arg == "--numa") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -728,17 +904,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
             else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
             else { invalid_param = true; break; }
-        } else if (arg == "--verbose-prompt") {
+        }
+        if (arg == "--verbose-prompt") {
+            arg_found = true;
             params.verbose_prompt = true;
-        } else if (arg == "--no-display-prompt") {
+        }
+        if (arg == "--no-display-prompt") {
+            arg_found = true;
             params.display_prompt = false;
-        } else if (arg == "-r" || arg == "--reverse-prompt") {
+        }
+        if (arg == "-r" || arg == "--reverse-prompt") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.antiprompt.emplace_back(argv[i]);
-        } else if (arg == "-ld" || arg == "--logdir") {
+        }
+        if (arg == "-ld" || arg == "--logdir") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -748,63 +932,93 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             if (params.logdir.back() != DIRECTORY_SEPARATOR) {
                 params.logdir += DIRECTORY_SEPARATOR;
             }
-        } else if (arg == "--save-all-logits" || arg == "--kl-divergence-base") {
+        }
+        if (arg == "--save-all-logits" || arg == "--kl-divergence-base") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.logits_file = argv[i];
-        } else if (arg == "--perplexity" || arg == "--all-logits") {
+        }
+        if (arg == "--perplexity" || arg == "--all-logits") {
+            arg_found = true;
             params.logits_all = true;
-        } else if (arg == "--ppl-stride") {
+        }
+        if (arg == "--ppl-stride") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.ppl_stride = std::stoi(argv[i]);
-        } else if (arg == "-ptc" || arg == "--print-token-count") {
+        }
+        if (arg == "-ptc" || arg == "--print-token-count") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.n_print = std::stoi(argv[i]);
-        } else if (arg == "--ppl-output-type") {
+        }
+        if (arg == "--ppl-output-type") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.ppl_output_type = std::stoi(argv[i]);
-        } else if (arg == "--hellaswag") {
+        }
+        if (arg == "--hellaswag") {
+            arg_found = true;
             params.hellaswag = true;
-        } else if (arg == "--hellaswag-tasks") {
+        }
+        if (arg == "--hellaswag-tasks") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.hellaswag_tasks = std::stoi(argv[i]);
-        } else if (arg == "--winogrande") {
+        }
+        if (arg == "--winogrande") {
+            arg_found = true;
             params.winogrande = true;
-        } else if (arg == "--winogrande-tasks") {
+        }
+        if (arg == "--winogrande-tasks") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.winogrande_tasks = std::stoi(argv[i]);
-        } else if (arg == "--multiple-choice") {
+        }
+        if (arg == "--multiple-choice") {
+            arg_found = true;
             params.multiple_choice = true;
-        } else if (arg == "--multiple-choice-tasks") {
+        }
+        if (arg == "--multiple-choice-tasks") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.multiple_choice_tasks = std::stoi(argv[i]);
-        } else if (arg == "--kl-divergence") {
+        }
+        if (arg == "--kl-divergence") {
+            arg_found = true;
             params.kl_divergence = true;
-        } else if (arg == "--ignore-eos") {
+        }
+        if (arg == "--ignore-eos") {
+            arg_found = true;
             params.ignore_eos = true;
-        } else if (arg == "--no-penalize-nl") {
+        }
+        if (arg == "--no-penalize-nl") {
+            arg_found = true;
             sparams.penalize_nl = false;
-        } else if (arg == "-l" || arg == "--logit-bias") {
+        }
+        if (arg == "-l" || arg == "--logit-bias") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -823,36 +1037,51 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 invalid_param = true;
                 break;
             }
-        } else if (arg == "-h" || arg == "--help") {
+        }
+        if (arg == "-h" || arg == "--help") {
+            arg_found = true;
             return false;
-
-        } else if (arg == "--version") {
+        }
+        if (arg == "--version") {
+            arg_found = true;
             fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
             fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
             exit(0);
-        } else if (arg == "--random-prompt") {
+        }
+        if (arg == "--random-prompt") {
+            arg_found = true;
             params.random_prompt = true;
-        } else if (arg == "--in-prefix-bos") {
+        }
+        if (arg == "--in-prefix-bos") {
+            arg_found = true;
             params.input_prefix_bos = true;
-        } else if (arg == "--in-prefix") {
+        }
+        if (arg == "--in-prefix") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.input_prefix = argv[i];
-        } else if (arg == "--in-suffix") {
+        }
+        if (arg == "--in-suffix") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             params.input_suffix = argv[i];
-        } else if (arg == "--grammar") {
+        }
+        if (arg == "--grammar") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
             sparams.grammar = argv[i];
-        } else if (arg == "--grammar-file") {
+        }
+        if (arg == "--grammar-file") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -868,7 +1097,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                 std::istreambuf_iterator<char>(),
                 std::back_inserter(sparams.grammar)
             );
-        } else if (arg == "--override-kv") {
+        }
+        if (arg == "--override-kv") {
+            arg_found = true;
             if (++i >= argc) {
                 invalid_param = true;
                 break;
@@ -911,10 +1142,14 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             params.kv_overrides.push_back(kvo);
 #ifndef LOG_DISABLE_LOGS
         // Parse args for logging parameters
-        } else if ( log_param_single_parse( argv[i] ) ) {
+        }
+        if ( log_param_single_parse( argv[i] ) ) {
+            arg_found = true;
             // Do nothing, log_param_single_parse automatically does it's thing
             //  and returns if a match was found and parsed.
-        } else if ( log_param_pair_parse( /*check_but_dont_parse*/ true, argv[i] ) ) {
+        }
+        if ( log_param_pair_parse( /*check_but_dont_parse*/ true, argv[i] ) ) {
+            arg_found = true;
             // We have a matching known parameter requiring an argument,
             //  now we need to check if there is anything after this argv
             //  and flag invalid_param or parse it.
@@ -928,7 +1163,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             }
         // End of Parse args for logging parameters
 #endif // LOG_DISABLE_LOGS
-        } else {
+        }
+
+        if (!arg_found) {
             throw std::invalid_argument("error: unknown argument: " + arg);
         }
     }

From dfbfdd60f90207404039c6578d709231496831d9 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Sat, 16 Mar 2024 16:42:08 +0100
Subject: [PATCH 3/5] readme : add wllama as a wasm binding (#6100)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5cbdf7e47..c2f3342f0 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,7 @@ Typically finetunes of the base models below are supported as well.
 - Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
 - JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp)
 - JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm)
+- Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama)
 - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
 - Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
 - Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs)

From b5f4ae09c3244ae1644b67c03ed9f4227ab25ad2 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sat, 16 Mar 2024 16:46:29 +0100
Subject: [PATCH 4/5] gritlm : add initial README.md (#6086)

* gritlm: add initial README.md to examples/gritlm

This commit adds a suggestion for an initial README.md for the gritlm
example.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>

* squash! gritlm: add initial README.md to examples/gritlm

Use the `scripts/hf.sh` script to download the model file.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>

* squash! gritlm: add initial README.md to examples/gritlm

Fix editorconfig-checker error in examples/gritlm/README.md.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>

---------

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
---
 examples/gritlm/README.md | 62 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 examples/gritlm/README.md

diff --git a/examples/gritlm/README.md b/examples/gritlm/README.md
new file mode 100644
index 000000000..64cc19204
--- /dev/null
+++ b/examples/gritlm/README.md
@@ -0,0 +1,62 @@
+## Generative Representational Instruction Tuning (GRIT) Example
+[gritlm] a model which can generate embeddings as well as "normal" text
+generation depending on the instructions in the prompt.
+
+* Paper: https://arxiv.org/pdf/2402.09906.pdf
+
+### Retrieval-Augmented Generation (RAG) use case
+One use case for `gritlm` is to use it with RAG. If we recall how RAG works is
+that we take documents that we want to use as context, to ground the large
+language model (LLM), and we create token embeddings for them. We then store
+these token embeddings in a vector database.
+
+When we perform a query, prompt the LLM, we will first create token embeddings
+for the query and then search the vector database to retrieve the most
+similar vectors, and return those documents so they can be passed to the LLM as
+context. Then the query and the context will be passed to the LLM which will
+have to _again_ create token embeddings for the query. But because gritlm is used
+the first query can be cached and the second query tokenization generation does
+not have to be performed at all.
+
+### Running the example
+Download a Grit model:
+```console
+$ scripts/hf.sh --repo cohesionet/GritLM-7B_gguf --file gritlm-7b_q4_1.gguf
+```
+
+Run the example using the downloaded model:
+```console
+$ ./gritlm -m gritlm-7b_q4_1.gguf
+
+Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "A purely peer-to-peer version of electronic cash w" is: 0.605
+Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "All text-based language problems can be reduced to" is: 0.103
+Cosine similarity between "Generative Representational Instruction Tuning" and "A purely peer-to-peer version of electronic cash w" is: 0.112
+Cosine similarity between "Generative Representational Instruction Tuning" and "All text-based language problems can be reduced to" is: 0.547
+
+Oh, brave adventurer, who dared to climb
+The lofty peak of Mt. Fuji in the night,
+When shadows lurk and ghosts do roam,
+And darkness reigns, a fearsome sight.
+
+Thou didst set out, with heart aglow,
+To conquer this mountain, so high,
+And reach the summit, where the stars do glow,
+And the moon shines bright, up in the sky.
+
+Through the mist and fog, thou didst press on,
+With steadfast courage, and a steadfast will,
+Through the darkness, thou didst not be gone,
+But didst climb on, with a steadfast skill.
+
+At last, thou didst reach the summit's crest,
+And gazed upon the world below,
+And saw the beauty of the night's best,
+And felt the peace, that only nature knows.
+
+Oh, brave adventurer, who dared to climb
+The lofty peak of Mt. Fuji in the night,
+Thou art a hero, in the eyes of all,
+For thou didst conquer this mountain, so bright.
+```
+
+[gritlm]: https://github.com/ContextualAI/gritlm

From c47cf414efafb8f60596edc7edb5a2d68065e992 Mon Sep 17 00:00:00 2001
From: AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
Date: Sat, 16 Mar 2024 11:52:02 -0400
Subject: [PATCH 5/5] ggml : add AVX512F SIMD (#6088)

---
 ggml.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/ggml.c b/ggml.c
index c94006e51..fa23cb3c4 100644
--- a/ggml.c
+++ b/ggml.c
@@ -931,6 +931,101 @@ inline static float vaddvq_f32(float32x4_t v) {
     #define GGML_F16_VEC_REDUCE         GGML_F32Cx4_REDUCE
 #endif
 
+#elif defined(__AVX512F__)
+
+#define GGML_SIMD
+
+// F32 AVX512
+
+#define GGML_F32_STEP 64
+#define GGML_F32_EPR  16
+
+#define GGML_F32x16         __m512
+#define GGML_F32x16_ZERO    _mm512_setzero_ps()
+#define GGML_F32x16_SET1(x) _mm512_set1_ps(x)
+#define GGML_F32x16_LOAD    _mm512_loadu_ps
+#define GGML_F32x16_STORE   _mm512_storeu_ps
+// _mm512_fmadd_ps is defined in AVX512F so no guard is required
+#define GGML_F32x16_FMA(a, b, c) _mm512_fmadd_ps(b, c, a)
+#define GGML_F32x16_ADD     _mm512_add_ps
+#define GGML_F32x16_MUL     _mm512_mul_ps
+#define GGML_F32x16_REDUCE(res, x)                                    \
+do {                                                                  \
+    int offset = GGML_F32_ARR >> 1;                                   \
+    for (int i = 0; i < offset; ++i) {                                \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                      \
+    }                                                                 \
+    offset >>= 1;                                                     \
+    for (int i = 0; i < offset; ++i) {                                \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                      \
+    }                                                                 \
+    offset >>= 1;                                                     \
+    for (int i = 0; i < offset; ++i) {                                \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                      \
+    }                                                                 \
+    res = _mm512_reduce_add_ps(x[0]);                                 \
+} while (0)
+
+// TODO: is this optimal ?
+
+#define GGML_F32_VEC        GGML_F32x16
+#define GGML_F32_VEC_ZERO   GGML_F32x16_ZERO
+#define GGML_F32_VEC_SET1   GGML_F32x16_SET1
+#define GGML_F32_VEC_LOAD   GGML_F32x16_LOAD
+#define GGML_F32_VEC_STORE  GGML_F32x16_STORE
+#define GGML_F32_VEC_FMA    GGML_F32x16_FMA
+#define GGML_F32_VEC_ADD    GGML_F32x16_ADD
+#define GGML_F32_VEC_MUL    GGML_F32x16_MUL
+#define GGML_F32_VEC_REDUCE GGML_F32x16_REDUCE
+
+// F16 AVX512
+
+// F16 AVX
+
+#define GGML_F16_STEP 64
+#define GGML_F16_EPR  16
+
+// AVX512 has FP16 extension (AVX512_FP16) but I don't have it on my machine so I use FP32 instead
+
+#define GGML_F32Cx16             __m512
+#define GGML_F32Cx16_ZERO        _mm512_setzero_ps()
+#define GGML_F32Cx16_SET1(x)     _mm512_set1_ps(x)
+
+// unlike  _mm256_cvt intrinsics that require F16C, _mm512_cvt is defined in AVX512F
+// so F16C guard isn't required
+#define GGML_F32Cx16_LOAD(x)     _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(x)))
+#define GGML_F32Cx16_STORE(x, y) _mm256_storeu_si256((__m256i *)(x), _mm512_cvtps_ph(y, 0))
+
+#define GGML_F32Cx16_FMA(a, b, c) _mm512_fmadd_ps(b, c, a)
+#define GGML_F32Cx16_ADD         _mm512_add_ps
+#define GGML_F32Cx16_MUL         _mm512_mul_ps
+#define GGML_F32Cx16_REDUCE(res, x)                               \
+do {                                                              \
+    int offset = GGML_F32_ARR >> 1;                               \
+    for (int i = 0; i < offset; ++i) {                            \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                  \
+    }                                                             \
+    offset >>= 1;                                                 \
+    for (int i = 0; i < offset; ++i) {                            \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                  \
+    }                                                             \
+    offset >>= 1;                                                 \
+    for (int i = 0; i < offset; ++i) {                            \
+        x[i] = _mm512_add_ps(x[i], x[offset+i]);                  \
+    }                                                             \
+    res = _mm512_reduce_add_ps(x[0]);                             \
+} while (0)
+
+#define GGML_F16_VEC                GGML_F32Cx16
+#define GGML_F16_VEC_ZERO           GGML_F32Cx16_ZERO
+#define GGML_F16_VEC_SET1           GGML_F32Cx16_SET1
+#define GGML_F16_VEC_LOAD(p, i)     GGML_F32Cx16_LOAD(p)
+#define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx16_STORE(p, r[i])
+#define GGML_F16_VEC_FMA            GGML_F32Cx16_FMA
+#define GGML_F16_VEC_ADD            GGML_F32Cx16_ADD
+#define GGML_F16_VEC_MUL            GGML_F32Cx16_MUL
+#define GGML_F16_VEC_REDUCE         GGML_F32Cx16_REDUCE
+
 #elif defined(__AVX__)
 
 #define GGML_SIMD