test

2024-10-28 10:15:26 -05:00 · 2024-10-28 10:15:26 -05:00 · 31a90b3cb6
commit 31a90b3cb6
parent 24c46d9364
43 changed files with 100 additions and 568 deletions
--- a/.devops/jarvis-cpp-cuda.srpm.spec
+++ b/.devops/jarvis-cpp-cuda.srpm.spec
@ -15,7 +15,7 @@
 Name:           jarvis.cpp-cuda
 Version:        %( date "+%%Y%%m%%d" )
 Release:        1%{?dist}
-Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
+Summary:        CPU Inference of JARVIS model in pure C/C++ (no CUDA/OpenCL)
 License:        MIT
 Source0:        https://github.com/ggerganov/jarvis.cpp/archive/refs/heads/master.tar.gz
 BuildRequires:  coreutils make gcc-c++ git cuda-toolkit
--- a/.devops/jarvis-cpp.srpm.spec
+++ b/.devops/jarvis-cpp.srpm.spec
@ -16,7 +16,7 @@
 Name:           jarvis.cpp
 Version:        %( date "+%%Y%%m%%d" )
 Release:        1%{?dist}
-Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
+Summary:        CPU Inference of JARVIS model in pure C/C++ (no CUDA/OpenCL)
 License:        MIT
 Source0:        https://github.com/ggerganov/jarvis.cpp/archive/refs/heads/master.tar.gz
 BuildRequires:  coreutils make gcc-c++ git libstdc++-devel
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -218,7 +218,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
    # overridden by importing Nixpkgs with `allowBroken = true`.
    broken = (useMetalKit && !effectiveStdenv.isDarwin);

-    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+    description = "Inference of JARVIS model in pure C/C++${descriptionSuffix}";
    homepage = "https://github.com/ggerganov/jarvis.cpp/";
    license = lib.licenses.mit;

--- a/ggerganov_llama.cpp
+++ b/ggerganov_llama.cpp
--- a/LLMCLI.java
+++ b/LLMCLI.java
@ -1,74 +0,0 @@
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.util.Scanner;
-
-public class LLMCLI {
-    public static void main(String[] args) {
-        // Path to the .exe file
-        String exePath = "bin/jarvis-cli.exe";
-
-        System.out.println("Enter -h for help");
-        // Scanner to take user input for various commands
-        Scanner scanner = new Scanner(System.in);
-
-        while (true) {
-            String commandInput = scanner.nextLine();
-
-            // Split user input into command array for ProcessBuilder
-            String[] commands = commandInput.split(" ");
-
-            // Create an array to hold both the executable path and the commands
-            String[] fullCommand = new String[commands.length + 1];
-            fullCommand[0] = exePath;  // First element is the executable path
-            System.arraycopy(commands, 0, fullCommand, 1, commands.length);  // Copy the user commands after the exe path
-
-            Process process = null;
-
-            try {
-                // Create a ProcessBuilder with the executable and dynamic commands
-                ProcessBuilder processBuilder = new ProcessBuilder(fullCommand);
-
-                // Redirect error stream to read both error and output in one stream
-                processBuilder.redirectErrorStream(true);
-
-                // Start the process
-                process = processBuilder.start();
-
-                // Capture output in a separate thread
-                Process finalProcess = process;
-                new Thread(() -> {
-                    try (BufferedReader reader = new BufferedReader(new InputStreamReader(finalProcess.getInputStream()))) {
-                        String line;
-                        while ((line = reader.readLine()) != null) {
-                            System.out.println(line);
-                        }
-                    } catch (IOException e) {
-                        e.printStackTrace();
-                    }
-                }).start();
-
-                // Use OutputStream to send input to the process (if needed)
-                try (OutputStream processInput = process.getOutputStream()) {
-                    String userInput;
-                    while (scanner.hasNextLine() && process.isAlive()) {
-                        userInput = scanner.nextLine();
-                        processInput.write((userInput + "\n").getBytes());
-                        processInput.flush();  // Ensure input is sent immediately
-                    }
-                }
-
-                // Wait for the process to complete and get the exit code
-                int exitCode = process.waitFor();
-            } catch (IOException | InterruptedException e) {
-                e.printStackTrace();
-            } finally {
-                // Ensure the process is destroyed if still running
-                if (process != null) {
-                    process.destroy();
-                }
-            }
-        }
-    }
-}
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@

 [Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/jarvis.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/jarvis.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)

-Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
+Inference of Meta's [JARVIS](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++

 ## Recent API changes

@ -43,14 +43,14 @@ improved significantly thanks to many contributions. It is the main playground f

 Typically finetunes of the base models below are supported as well.

- [X] LLaMA 🦙
- [x] LLaMA 2 🦙🦙
- [x] LLaMA 3 🦙🦙🦙
+- [X] JARVIS 🦙
+- [x] JARVIS 2 🦙🦙
+- [x] JARVIS 3 🦙🦙🦙
 - [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
 - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
 - [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
 - [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
+- [X] [Chinese JARVIS / Alpaca](https://github.com/ymcui/Chinese-JARVIS-Alpaca) and [Chinese JARVIS-2 / Alpaca-2](https://github.com/ymcui/Chinese-JARVIS-Alpaca-2)
 - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
 - [X] [BERT](https://github.com/ggerganov/jarvis.cpp/pull/5423)
 - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
@ -140,7 +140,7 @@ Typically finetunes of the base models below are supported as well.
 Unless otherwise noted these projects are open-source with permissive licensing:

 - [MindWorkAI/AI-Studio](https://github.com/MindWorkAI/AI-Studio) (FSL-1.1-MIT)
- [iohub/cojarvis](https://github.com/iohub/coLLaMA)
+- [iohub/cojarvis](https://github.com/iohub/coJARVIS)
 - [janhq/jan](https://github.com/janhq/jan) (AGPL)
 - [nat/openplayground](https://github.com/nat/openplayground)
 - [Faraday](https://faraday.dev/) (proprietary)
@ -198,7 +198,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
 ## Demo

 <details>
-<summary>Typical run using LLaMA v2 13B on M2 Ultra</summary>
+<summary>Typical run using JARVIS v2 13B on M2 Ultra</summary>

 ```
 $ make -j && ./jarvis-cli -m models/jarvis-13b-v2/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e
@ -240,7 +240,7 @@ llm_load_print_meta: freq_scale     = 1
 llm_load_print_meta: model type     = 13B
 llm_load_print_meta: model ftype    = mostly Q4_0
 llm_load_print_meta: model size     = 13.02 B
-llm_load_print_meta: general.name   = LLaMA v2
+llm_load_print_meta: general.name   = JARVIS v2
 llm_load_print_meta: BOS token = 1 '<s>'
 llm_load_print_meta: EOS token = 2 '</s>'
 llm_load_print_meta: UNK token = 0 '<unk>'
@ -281,9 +281,9 @@ jarvis_print_timings:       total time = 25431.49 ms
 </details>

 <details>
-<summary>Demo of running both LLaMA-7B and whisper.cpp on a single M1 Pro MacBook</summary>
+<summary>Demo of running both JARVIS-7B and whisper.cpp on a single M1 Pro MacBook</summary>

-And here is another demo of running both LLaMA-7B and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) on a single M1 Pro MacBook:
+And here is another demo of running both JARVIS-7B and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) on a single M1 Pro MacBook:

 https://user-images.githubusercontent.com/1991296/224442907-7693d4be-acaa-4e01-8b4f-add84093ffff.mp4

@ -357,7 +357,7 @@ Example usage:
 > [!NOTE]
 > If you prefer basic usage, please consider using conversation mode instead of interactive mode

-In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes LLaMA emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`.
+In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes JARVIS emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`.

 Here is an example of a few-shot interaction, invoked with the command

@ -432,10 +432,10 @@ Please refer to [Build jarvis.cpp locally](./docs/build.md)
 > [!NOTE]
 > You can use the [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space on Hugging Face to quantise your model weights without any setup too. It is synced from `jarvis.cpp` main every 6 hours.

-To obtain the official LLaMA 2 weights please see the <a href="#obtaining-and-using-the-facebook-jarvis-2-model">Obtaining and using the Facebook LLaMA 2 model</a> section. There is also a large selection of pre-quantized `gguf` models available on Hugging Face.
+To obtain the official JARVIS 2 weights please see the <a href="#obtaining-and-using-the-facebook-jarvis-2-model">Obtaining and using the Facebook JARVIS 2 model</a> section. There is also a large selection of pre-quantized `gguf` models available on Hugging Face.

 Note: `convert.py` has been moved to `examples/convert_legacy_jarvis.py` and shouldn't be used for anything other than `Jarvis/Jarvis2/Mistral` models and their derivatives.
-It does not support LLaMA 3, you can use `convert_hf_to_gguf.py` with LLaMA 3 downloaded from Hugging Face.
+It does not support JARVIS 3, you can use `convert_hf_to_gguf.py` with JARVIS 3 downloaded from Hugging Face.

 To learn more about quantizing model, [read this documentation](./examples/quantize/README.md)

@ -474,10 +474,10 @@ To learn more how to measure perplexity using jarvis.cpp, [read this documentati

 **Seminal papers and background on the models**

-If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT:
- LLaMA:
-    - [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-jarvis-meta-ai/)
-    - [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
+If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of JARVIS models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between JARVIS models and ChatGPT:
+- JARVIS:
+    - [Introducing JARVIS: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-jarvis-meta-ai/)
+    - [JARVIS: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
 - GPT-3
    - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165)
 - GPT-3.5 / InstructGPT / ChatGPT:
--- a/SECURITY.md
+++ b/SECURITY.md
@ -26,7 +26,7 @@ For maximum security when handling untrusted inputs, you may need to employ the

 * Sandboxing: Isolate the environment where the inference happens.
 * Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
-* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
+* Updates: Keep both JARVIS C++ and your libraries updated with the latest security patches.
 * Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
    * Validation: Enforce strict rules on allowed characters and data types.
    * Filtering: Remove potentially malicious scripts or code fragments.
@ -57,7 +57,7 @@ If you intend to run multiple models in parallel with shared memory, it is your

 ## Reporting a vulnerability

-Beware that none of the topics under [Using jarvis.cpp securely](#using-jarviscpp-securely) are considered vulnerabilities of LLaMA C++.
+Beware that none of the topics under [Using jarvis.cpp securely](#using-jarviscpp-securely) are considered vulnerabilities of JARVIS C++.

 <!-- normal version -->
 However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
--- a/ci/run.sh
+++ b/ci/run.sh
@ -390,7 +390,7 @@ function gg_run_open_jarvis_7b_v2 {
 function gg_sum_open_jarvis_7b_v2 {
    gg_printf '### %s\n\n' "${ci}"

-    gg_printf 'OpenLLaMA 7B-v2:\n'
+    gg_printf 'OpenJARVIS 7B-v2:\n'
    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
    gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
    gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
--- a/cmake/jarvis.pc.in
+++ b/cmake/jarvis.pc.in
@ -4,7 +4,7 @@ libdir=${exec_prefix}/lib
 includedir=${prefix}/include

 Name: jarvis
-Description: Port of Facebook's LLaMA model in C/C++
+Description: Port of Facebook's JARVIS model in C/C++
 Version: @PROJECT_VERSION@
 Libs: -L${libdir} -ljarvis
 Cflags: -I${includedir}
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -1515,7 +1515,7 @@ class StableLMModel(Model):
                raise ValueError(f"Unprocessed norms: {norms}")


-@Model.register("LLaMAForCausalLM", "JarvisForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
+@Model.register("JARVISForCausalLM", "JarvisForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
 class JarvisModel(Model):
    model_arch = gguf.MODEL_ARCH.JARVIS

--- a/convert_jarvis_ggml_to_gguf.py
+++ b/convert_jarvis_ggml_to_gguf.py
@ -396,11 +396,11 @@ def handle_args():
    parser.add_argument('--desc',
                        help = 'Set model description')
    parser.add_argument('--gqa', type = int, default = 1,
-                        help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
+                        help = 'grouped-query attention factor (use 8 for JARVIS2 70B)')
    parser.add_argument('--eps', default = '5.0e-06',
-                        help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
+                        help = 'RMS norm eps: Use 1e-6 for JARVIS1 and OpenJARVIS, use 1e-5 for JARVIS2')
    parser.add_argument('--context-length', '-c', type=int, default = 2048,
-                        help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
+                        help = 'Default max context length: JARVIS1 is typically 2048, JARVIS2 is typically 4096')
    parser.add_argument('--model-metadata-dir', '-m', type = Path,
                        help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
    parser.add_argument("--vocab-dir", type=Path,
@ -417,7 +417,7 @@ def main():
    logger.info(f'* Using config: {cfg}')
    logger.warning('=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===')
    if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'):
-        logger.info('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
+        logger.info('- Note: If converting JARVIS2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
    data = np.memmap(cfg.input, mode = 'r')
    model = GGMLModel()
    logger.info('* Scanning GGML input file')
--- a/docs/build.md
+++ b/docs/build.md
@ -159,7 +159,7 @@ Building through oneAPI compilers will make avx_vnni instruction set available f
 - Using oneAPI docker image:
  If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-basekit](https://hub.docker.com/r/intel/oneapi-basekit). Then, you can use the commands given above.

-Check [Optimizing and Running LLaMA2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-jarvis2-on-intel-cpu.html) for more information.
+Check [Optimizing and Running JARVIS2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-jarvis2-on-intel-cpu.html) for more information.

 ### CUDA

--- a/docs/docker.md
+++ b/docs/docker.md
@ -7,7 +7,7 @@
 ## Images
 We have three Docker images available for this project:

-1. `ghcr.io/ggerganov/jarvis.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`)
+1. `ghcr.io/ggerganov/jarvis.cpp:full`: This image includes both the main executable file and the tools to convert JARVIS models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggerganov/jarvis.cpp:light`: This image only includes the main executable file. (platforms: `linux/amd64`, `linux/arm64`)
 3. `ghcr.io/ggerganov/jarvis.cpp:server`: This image only includes the server executable file. (platforms: `linux/amd64`, `linux/arm64`)

@ -74,7 +74,7 @@ The defaults are:

 The resulting images, are essentially the same as the non-CUDA images:

-1. `local/jarvis.cpp:full-cuda`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
+1. `local/jarvis.cpp:full-cuda`: This image includes both the main executable file and the tools to convert JARVIS models into ggml and convert into 4-bit quantization.
 2. `local/jarvis.cpp:light-cuda`: This image only includes the main executable file.
 3. `local/jarvis.cpp:server-cuda`: This image only includes the server executable file.

@ -108,7 +108,7 @@ The defaults are:

 The resulting images, are essentially the same as the non-MUSA images:

-1. `local/jarvis.cpp:full-musa`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
+1. `local/jarvis.cpp:full-musa`: This image includes both the main executable file and the tools to convert JARVIS models into ggml and convert into 4-bit quantization.
 2. `local/jarvis.cpp:light-musa`: This image only includes the main executable file.
 3. `local/jarvis.cpp:server-musa`: This image only includes the server executable file.

--- a/examples/batched-bench/README.md
+++ b/examples/batched-bench/README.md
@ -12,10 +12,10 @@ There are 2 modes of operation:
 ```bash
 ./jarvis-batched-bench -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps]

-# LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
+# JARVIS 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
 ./jarvis-batched-bench -m ./models/jarvis-7b/ggml-model-f16.gguf -c 16384 -b 2048 -ub 512 -ngl 99

-# LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
+# JARVIS 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
 ./jarvis-batched-bench -m ./models/jarvis-7b/ggml-model-q8_0.gguf -c 16384 -b 2048 -ub 512 -ngl 99 -pps

 # custom set of batches
--- a/examples/convert-jarvis2c-to-ggml/CMakeLists.txt
+++ b/examples/convert-jarvis2c-to-ggml/CMakeLists.txt
--- a/examples/convert-jarvis2c-to-ggml/README.md
+++ b/examples/convert-jarvis2c-to-ggml/README.md
--- a/examples/convert-jarvis2c-to-ggml/convert-jarvis2c-to-ggml.cpp
+++ b/examples/convert-jarvis2c-to-ggml/convert-jarvis2c-to-ggml.cpp
--- a/examples/convert_legacy_llama.py
+++ b/examples/convert_legacy_llama.py
@ -276,7 +276,7 @@ class Params:
            rope_finetuned    = rope_finetuned,
        )

-    # LLaMA v2 70B params.json
+    # JARVIS v2 70B params.json
    # {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
    @staticmethod
    def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
@ -288,7 +288,7 @@ class Params:
        f_rope_freq_base = None
        n_ff = None

-        # hack to determine LLaMA v1 vs v2 vs CodeJarvis
+        # hack to determine JARVIS v1 vs v2 vs CodeJarvis
        if config.get("moe"):
            # Mixtral
            n_ctx = 32768
@ -296,10 +296,10 @@ class Params:
            # CodeJarvis
            n_ctx = 16384
        elif config["norm_eps"] == 1e-05:
-            # LLaMA v2
+            # JARVIS v2
            n_ctx = 4096
        else:
-            # LLaMA v1
+            # JARVIS v1
            n_ctx = 2048

        if "layers.0.feed_forward.w1.weight" in model:
@ -467,7 +467,7 @@ class ModelPlus:


 def merge_sharded(models: list[LazyModel]) -> LazyModel:
-    # Original LLaMA models have each file contain one part of each tensor.
+    # Original JARVIS models have each file contain one part of each tensor.
    # Use a dict instead of a set to preserve order.
    names = {name: None for model in models for name in model}

@ -772,14 +772,14 @@ class OutputFile:

    def add_meta_model(self, params: Params, metadata: gguf.Metadata | None) -> None:
        # Metadata About The Model And Its Provenence
-        name = "LLaMA"
+        name = "JARVIS"
        if metadata is not None and metadata.name is not None:
            name = metadata.name
        elif params.path_model is not None:
            name = params.path_model.name
        elif params.n_ctx == 4096:
-            # Heuristic detection of LLaMA v2 model
-            name = "LLaMA v2"
+            # Heuristic detection of JARVIS v2 model
+            name = "JARVIS v2"

        self.gguf.add_name(name)

@ -1289,7 +1289,7 @@ def main(args_in: list[str] | None = None) -> None:
    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
        # We currently only support Q8_0 output on little endian systems.
        output_choices.append("q8_0")
-    parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
+    parser = argparse.ArgumentParser(description="Convert a JARVIS model to a GGML compatible file")
    parser.add_argument("--dump",         action="store_true",    help="don't convert, just show what's in the model")
    parser.add_argument("--dump-single",  action="store_true",    help="don't convert, just show what's in a single model file")
    parser.add_argument("--vocab-only",   action="store_true",    help="extract only the vocab")
@ -1366,8 +1366,8 @@ def main(args_in: list[str] | None = None) -> None:
                msg = """\
                    The model doesn't have a context size, and you didn't specify one with --ctx
                    Please specify one with --ctx:
-                     - LLaMA v1: --ctx 2048
-                     - LLaMA v2: --ctx 4096"""
+                     - JARVIS v1: --ctx 2048
+                     - JARVIS v2: --ctx 4096"""
                parser.error(textwrap.dedent(msg))
            params.n_ctx = args.ctx

--- a/examples/infill/README.md
+++ b/examples/infill/README.md
@ -9,17 +9,17 @@ For further information have a look at the main README.md in jarvis.cpp/example/

 ## Common Options

-In this section, we cover the most commonly used options for running the `infill` program with the LLaMA models:
+In this section, we cover the most commonly used options for running the `infill` program with the JARVIS models:

-   `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`).
+-   `-m FNAME, --model FNAME`: Specify the path to the JARVIS model file (e.g., `models/7B/ggml-model.bin`).
 -   `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses.
 -   `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.
-   `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
+-   `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but JARVIS models were built with a context of 2048, which will provide better results for longer input/inference.
 -   `--spm-infill`: Use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this.

 ## Input Prompts

-The `infill` program provides several ways to interact with the LLaMA models using input prompts:
+The `infill` program provides several ways to interact with the JARVIS models using input prompts:

 -   `--in-prefix PROMPT_BEFORE_CURSOR`: Provide the prefix directly as a command-line option.
 -   `--in-suffix PROMPT_AFTER_CURSOR`: Provide the suffix directly as a command-line option.
@ -27,7 +27,7 @@ The `infill` program provides several ways to interact with the LLaMA models usi

 ## Interaction

-The `infill` program offers a seamless way to interact with LLaMA models, allowing users to receive real-time infill suggestions. The interactive mode can be triggered using `--interactive`, and `--interactive-first`
+The `infill` program offers a seamless way to interact with JARVIS models, allowing users to receive real-time infill suggestions. The interactive mode can be triggered using `--interactive`, and `--interactive-first`

 ### Interaction Options

--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@ -311,10 +311,10 @@ int main(int argc, char ** argv) {
    if (params.interactive) {
        const char *control_message;
        if (params.multiline_input) {
-            control_message = " - To return control to LLaMA, end your input with '\\'.\n"
+            control_message = " - To return control to JARVIS, end your input with '\\'.\n"
                              " - To return control without starting a new line, end your input with '/'.\n";
        } else {
-            control_message = " - Press Return to return control to LLaMA.\n"
+            control_message = " - Press Return to return control to JARVIS.\n"
                              " - To return control without starting a new line, end your input with '/'.\n"
                              " - If you want to submit another line, end your input with '\\'.\n";
        }
--- a/examples/llava/MobileVLM-README.md
+++ b/examples/llava/MobileVLM-README.md
@ -30,7 +30,7 @@ git clone https://huggingface.co/mtgv/MobileVLM-1.7B
 git clone https://huggingface.co/openai/clip-vit-large-patch14-336
 ```

-2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
+2. Use `llava_surgery.py` to split the LLaVA model to JARVIS and multimodel projector constituents:

 ```sh
 python ./examples/llava/llava_surgery.py -m path/to/MobileVLM-1.7B
@ -54,18 +54,18 @@ python ./examples/llava/convert_image_encoder_to_gguf.py \
    --projector-type ldpv2
 ```

-4. Use `examples/convert_legacy_jarvis.py` to convert the LLaMA part of LLaVA to GGUF:
+4. Use `examples/convert_legacy_jarvis.py` to convert the JARVIS part of LLaVA to GGUF:

 ```sh
 python ./examples/convert_legacy_jarvis.py path/to/MobileVLM-1.7B --skip-unknown
 ```

-5. Use `quantize` to convert LLaMA part's DataType from `fp32` to `q4_k`
+5. Use `quantize` to convert JARVIS part's DataType from `fp32` to `q4_k`
 ```sh
 ./jarvis-quantize path/to/MobileVLM-1.7B/ggml-model-F32.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s
 ```

-Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directory.
+Now both the JARVIS part and the image encoder is in the `MobileVLM-1.7B` directory.

 ## Android compile and run
 ### compile
--- a/examples/llava/README.md
+++ b/examples/llava/README.md
@ -38,7 +38,7 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
 pip install -r examples/llava/requirements.txt
 ```

-3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
+3. Use `llava_surgery.py` to split the LLaVA model to JARVIS and multimodel projector constituents:

 ```sh
 python ./examples/llava/llava_surgery.py -m ../llava-v1.5-7b
@ -50,13 +50,13 @@ python ./examples/llava/llava_surgery.py -m ../llava-v1.5-7b
 python ./examples/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
 ```

-5. Use `examples/convert_legacy_jarvis.py` to convert the LLaMA part of LLaVA to GGUF:
+5. Use `examples/convert_legacy_jarvis.py` to convert the JARVIS part of LLaVA to GGUF:

 ```sh
 python ./examples/convert_legacy_jarvis.py ../llava-v1.5-7b --skip-unknown
 ```

-Now both the LLaMA part and the image encoder are in the `llava-v1.5-7b` directory.
+Now both the JARVIS part and the image encoder are in the `llava-v1.5-7b` directory.

 ## LLaVA 1.6 gguf conversion
 1) First clone a LLaVA 1.6 model:
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@ -372,7 +372,7 @@ bool llava_validate_embed_size(const jarvis_context * ctx_jarvis, const clip_ctx
    int n_jarvis_embd = jarvis_n_embd(jarvis_get_model(ctx_jarvis));
    auto n_image_embd = clip_n_mmproj_embd(ctx_clip);
    if (n_image_embd != n_jarvis_embd) {
-        LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_jarvis_embd);
+        LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of JARVIS (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_jarvis_embd);
        return false;
    }
    return true;
--- a/examples/llava/llava_surgery.py
+++ b/examples/llava/llava_surgery.py
@ -34,5 +34,5 @@ if len(clip_tensors) > 0:


 print("Done!")
-print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
+print(f"Now you can convert {args.model} to a regular JARVIS GGUF file.")
 print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
--- a/examples/llava/llava_surgery_v2.py
+++ b/examples/llava/llava_surgery_v2.py
@ -155,5 +155,5 @@ if len(projector) > 0:
    save_model(projector, f"{args.model}/llava.projector", 'pytorch')

 print("Done!")
-print(f"Now you can convert {args.model} to a a regular LLaMA GGUF file.")
+print(f"Now you can convert {args.model} to a a regular JARVIS GGUF file.")
 print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
--- a/examples/llava/minicpmv-surgery.py
+++ b/examples/llava/minicpmv-surgery.py
@ -41,5 +41,5 @@ tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
 tok.save_pretrained(f"{args.model}/model")

 print("Done!")
-print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
+print(f"Now you can convert {args.model} to a regular JARVIS GGUF file.")
 print(f"Also, use {args.model}/minicpmv.projector to prepare a minicpmv-encoder.gguf file.")
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -1,6 +1,6 @@
 # jarvis.cpp/examples/main

-This example program allows you to use various LLaMA language models easily and efficiently. It is specifically designed to work with the [jarvis.cpp](https://github.com/ggerganov/jarvis.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts.
+This example program allows you to use various JARVIS language models easily and efficiently. It is specifically designed to work with the [jarvis.cpp](https://github.com/ggerganov/jarvis.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with JARVIS models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts.

 ## Table of Contents

@ -60,20 +60,20 @@ jarvis-cli.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1

 ## Common Options

-In this section, we cover the most commonly used options for running the `jarvis-cli` program with the LLaMA models:
+In this section, we cover the most commonly used options for running the `jarvis-cli` program with the JARVIS models:

-   `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/gemma-1.1-7b-it.Q4_K_M.gguf`; inferred from `--model-url` if set).
+-   `-m FNAME, --model FNAME`: Specify the path to the JARVIS model file (e.g., `models/gemma-1.1-7b-it.Q4_K_M.gguf`; inferred from `--model-url` if set).
 -   `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g [https://huggingface.co/ggml-org/gemma-1.1-7b-it-Q4_K_M-GGUF/resolve/main/gemma-1.1-7b-it.Q4_K_M.gguf?download=true](https://huggingface.co/ggml-org/gemma-1.1-7b-it-Q4_K_M-GGUF/resolve/main/gemma-1.1-7b-it.Q4_K_M.gguf?download=true)).
 -   `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses.
 -   `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.
-   `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
+-   `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but JARVIS models were built with a context of 2048, which will provide better results for longer input/inference.
 -   `-mli, --multiline-input`: Allows you to write or paste multiple lines without ending each in '\'
 -   `-t N, --threads N`: Set the number of threads to use during generation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has.
 -   `-ngl N, --n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.

 ## Input Prompts

-The `jarvis-cli` program provides several ways to interact with the LLaMA models using input prompts:
+The `jarvis-cli` program provides several ways to interact with the JARVIS models using input prompts:

 -   `--prompt PROMPT`: Provide a prompt directly as a command-line option.
 -   `--file FNAME`: Provide a file containing a prompt or multiple prompts.
@ -81,9 +81,9 @@ The `jarvis-cli` program provides several ways to interact with the LLaMA models

 ## Interaction

-The `jarvis-cli` program offers a seamless way to interact with LLaMA models, allowing users to engage in real-time conversations or provide instructions for specific tasks. The interactive mode can be triggered using various options, including `--interactive` and `--interactive-first`.
+The `jarvis-cli` program offers a seamless way to interact with JARVIS models, allowing users to engage in real-time conversations or provide instructions for specific tasks. The interactive mode can be triggered using various options, including `--interactive` and `--interactive-first`.

-In interactive mode, users can participate in text generation by injecting their input during the process. Users can press `Ctrl+C` at any time to interject and type their input, followed by pressing `Return` to submit it to the LLaMA model. To submit additional lines without finalizing input, users can end the current line with a backslash (`\`) and continue typing.
+In interactive mode, users can participate in text generation by injecting their input during the process. Users can press `Ctrl+C` at any time to interject and type their input, followed by pressing `Return` to submit it to the JARVIS model. To submit additional lines without finalizing input, users can end the current line with a backslash (`\`) and continue typing.

 ### Interaction Options

@ -92,11 +92,11 @@ In interactive mode, users can participate in text generation by injecting their
 -   `-cnv,  --conversation`:  Run the program in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: false)
 -   `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text.

-By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs.
+By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the JARVIS models, tailoring the text generation process to your specific needs.

 ### Reverse Prompts

-Reverse prompts are a powerful way to create a chat-like experience with a LLaMA model by pausing the text generation when specific text strings are encountered:
+Reverse prompts are a powerful way to create a chat-like experience with a JARVIS model by pausing the text generation when specific text strings are encountered:

 -   `-r PROMPT, --reverse-prompt PROMPT`: Specify one or multiple reverse prompts to pause text generation and switch to interactive mode. For example, `-r "User:"` can be used to jump back into the conversation whenever it's the user's turn to speak. This helps create a more interactive and conversational experience. However, the reverse prompt doesn't work when it ends with a space.

@ -127,11 +127,11 @@ When --in-prefix or --in-suffix options are enabled the chat template ( --chat-t

 ## Context Management

-During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations.
+During text generation, JARVIS models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations.

 ### Context Size

- `-c N, --ctx-size N`: Set the size of the prompt context (default: 0, 0 = loaded from model). The LLaMA models were built with a context of 2048-8192, which will yield the best results on longer input/inference.
+- `-c N, --ctx-size N`: Set the size of the prompt context (default: 0, 0 = loaded from model). The JARVIS models were built with a context of 2048-8192, which will yield the best results on longer input/inference.

 ### Extended Context Size

@ -145,7 +145,7 @@ The `--keep` option allows users to retain the original prompt when the model ru

 -   `--keep N`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context. By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt.

-By utilizing context management options like `--ctx-size` and `--keep`, you can maintain a more coherent and consistent interaction with the LLaMA models, ensuring that the generated text remains relevant to the original prompt or conversation.
+By utilizing context management options like `--ctx-size` and `--keep`, you can maintain a more coherent and consistent interaction with the JARVIS models, ensuring that the generated text remains relevant to the original prompt or conversation.

 ## Generation Flags

@ -286,7 +286,7 @@ The logit bias option allows you to manually adjust the likelihood of specific t

 For example, use `--logit-bias 15043+1` to increase the likelihood of the token 'Hello', or `--logit-bias 15043-1` to decrease its likelihood. Using a value of negative infinity, `--logit-bias 15043-inf` ensures that the token `Hello` is never produced.

-A more practical use case might be to prevent the generation of `\code{begin}` and `\code{end}` by setting the `\` token (29905) to negative infinity with `-l 29905-inf`. (This is due to the prevalence of LaTeX codes that show up in LLaMA model inference.)
+A more practical use case might be to prevent the generation of `\code{begin}` and `\code{end}` by setting the `\` token (29905) to negative infinity with `-l 29905-inf`. (This is due to the prevalence of LaTeX codes that show up in JARVIS model inference.)

 Example usage: `--logit-bias 29905-inf`

@ -298,7 +298,7 @@ The RNG seed is used to initialize the random number generator that influences t

 ## Performance Tuning and Memory Options

-These options help improve the performance and memory usage of the LLaMA models. By adjusting these settings, you can fine-tune the model's behavior to better suit your system's capabilities and achieve optimal performance for your specific use case.
+These options help improve the performance and memory usage of the JARVIS models. By adjusting these settings, you can fine-tune the model's behavior to better suit your system's capabilities and achieve optimal performance for your specific use case.

 ### Number of Threads

@ -343,7 +343,7 @@ For information about 4-bit quantization, which can significantly improve perfor

 ## Additional Options

-These options provide extra functionality and customization when running the LLaMA models:
+These options provide extra functionality and customization when running the JARVIS models:

 -   `-h, --help`: Display a help message showing all available options and their default values. This is particularly useful for checking the latest options and default values, as they can change frequently, and the information in this document may become outdated.
 -   `--verbose-prompt`: Print the prompt before generating text.
--- a/examples/perplexity/README.md
+++ b/examples/perplexity/README.md
@ -16,7 +16,7 @@ The uncertainty is determined empirically by assuming a Gaussian distribution of
 More statistics can be obtained by recording the logits from the FP16 version of a model.
 To do this, supply `perplexity` with `--kl-divergence-base path/to/logit/binary/file.kld`.
 The program will then record all logits and save them to the provided path in binary format.
-**The logit file will be very large, 11 GiB for LLaMA 2 or 37 GiB for LLaMA 3 when using the Wikitext-2 test set.**
+**The logit file will be very large, 11 GiB for JARVIS 2 or 37 GiB for JARVIS 3 when using the Wikitext-2 test set.**
 Once you have the file, supply `perplexity` with the quantized model, the logits file via `--kl-divergence-base`,
 and finally the `--kl-divergence` argument to indicate that the program should calculate the so-called Kullback-Leibler divergence.
 This is a measure of how similar the FP16 and the quantized logit distributions are with a value of 0 indicating that the distribution are the same.
@ -32,7 +32,7 @@ In addition to the KL divergence the following statistics are calculated with `-
 * The root mean square of the change in token probabilities. If you were to assume that the quantization simply causes Gaussian noise on the token probabilities then this would be the standard deviation of said noise. The uncertainty on the value is calculated that the change in token probabilities follows a Gaussian distribution. Related discussion: https://github.com/ggerganov/jarvis.cpp/discussions/2875 .
 * Same top p: Percentage of how often the token was assigned the highest probabilites by both models. The uncertainty is calculated from the Gaussian approximation of the binomial distribution.

-## LLaMA 3 8b Scoreboard
+## JARVIS 3 8b Scoreboard

 | Revision | f364eb6f           |
 |:---------|:-------------------|
@ -98,7 +98,7 @@ So the "f16" results are to be understood as the difference resulting only from
 There seems to be no consistent improvement from using more Wikitext tokens for the importance matrix.
 K-quants score better on mean Δp than the legacy quants than e.g. KL divergence would suggest.

-## LLaMA 2 vs. LLaMA 3 Quantization comparison
+## JARVIS 2 vs. JARVIS 3 Quantization comparison

 | Revision | f364eb6f           |
 |:---------|:-------------------|
@ -124,7 +124,7 @@ K-quants score better on mean Δp than the legacy quants than e.g. KL divergence
 | RMS Δp          |     9.762 ± 0.053 % |    21.421 ± 0.079 % |     3.252 ± 0.024 % |     5.519 ± 0.050 % |     1.339 ± 0.010 % |     2.295 ± 0.019 % |     0.618 ± 0.011 % |     1.198 ± 0.007 % |
 | Same top p      |    85.584 ± 0.086 % |    71.138 ± 0.119 % |    94.665 ± 0.055 % |    91.901 ± 0.072 % |    97.520 ± 0.038 % |    96.031 ± 0.051 % |    98.846 ± 0.026 % |    97.674 ± 0.040 % |

-## LLaMA 3 BF16 vs. FP16 comparison
+## JARVIS 3 BF16 vs. FP16 comparison

 | Revision | 83330d8c      |
 |:---------|:--------------|
@ -132,7 +132,7 @@ K-quants score better on mean Δp than the legacy quants than e.g. KL divergence
 | CPU      | AMD Epyc 7742 |
 | GPU      | N/A           |

-Results were calculated with LLaMA 3 8b BF16 as `--kl-divergence-base` and LLaMA 3 8b FP16 as the `--model` for comparison.
+Results were calculated with JARVIS 3 8b BF16 as `--kl-divergence-base` and JARVIS 3 8b FP16 as the `--model` for comparison.

 | Metric                         |                    Value |
 |--------------------------------|--------------------------|
--- a/examples/quantize/README.md
+++ b/examples/quantize/README.md
@ -7,7 +7,7 @@ Note: It is synced from jarvis.cpp `main` every 6 hours.
 Example usage:

 ```bash
-# obtain the official LLaMA model weights and place them in ./models
+# obtain the official JARVIS model weights and place them in ./models
 ls ./models
 jarvis-2-7b tokenizer_checklist.chk tokenizer.model
 # [Optional] for models using BPE tokenizers
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -1,4 +1,4 @@
-# LLaMA.cpp HTTP Server
+# JARVIS.cpp HTTP Server

 Fast, lightweight, pure C/C++ HTTP server based on [httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) and **jarvis.cpp**.

--- a/examples/server/themes/README.md
+++ b/examples/server/themes/README.md
@ -1,4 +1,4 @@
-# LLaMA.cpp Server Wild Theme
+# JARVIS.cpp Server Wild Theme

 Simple themes directory of sample "public" directories. To try any of these add --path to your run like `server --path=wild`.

--- a/examples/server/themes/buttons-top/README.md
+++ b/examples/server/themes/buttons-top/README.md
@ -1,4 +1,4 @@
-# LLaMA.cpp Server Buttons Top Theme
+# JARVIS.cpp Server Buttons Top Theme

 Simple tweaks to the UI. Chat buttons at the top of the page instead of bottom so you can hit Stop instead of chasing it down the page.

--- a/examples/server/themes/wild/README.md
+++ b/examples/server/themes/wild/README.md
@ -1,4 +1,4 @@
-# LLaMA.cpp Server Wild Theme
+# JARVIS.cpp Server Wild Theme

 Simple tweaks to the UI. To use simply run server with `--path=themes/wild`

--- a/flake.nix
+++ b/flake.nix
@ -13,7 +13,7 @@
 # Cf. https://jade.fyi/blog/flakes-arent-real/ for a more detailed exposition
 # of the relation between Nix and the Nix Flakes.
 {
-  description = "Port of Facebook's LLaMA model in C/C++";
+  description = "Port of Facebook's JARVIS model in C/C++";

  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@ -3127,7 +3127,7 @@ static enum ggml_status ggml_metal_graph_compute(
    // while these nodes are processing, we start n_cb threads to enqueue the rest of the nodes
    // each thread creates it's own command buffer and enqueues the ops in parallel
    //
-    // tests on M1 Pro and M2 Ultra using LLaMA models, show that optimal values for n_cb are 1 or 2
+    // tests on M1 Pro and M2 Ultra using JARVIS models, show that optimal values for n_cb are 1 or 2

    @autoreleasepool {
        ctx->gf = gf;
--- a/ggml/src/jarvisfile/sgemm.cpp
+++ b/ggml/src/jarvisfile/sgemm.cpp
@ -40,7 +40,7 @@
 // hardware for performance, and then use whatever resources remain for
 // improving numerical accuracy.
 //
-// [1] J. Tunney, ‘LLaMA Now Goes Faster on CPUs’, Mar. 2024. [Online].
+// [1] J. Tunney, ‘JARVIS Now Goes Faster on CPUs’, Mar. 2024. [Online].
 //     Available: https://justine.lol/matmul/. [Accessed: 29-Mar-2024].

 #if defined(__GNUC__)
--- a/ggml/src/jarvisfile/sgemm.h
+++ b/ggml/src/jarvisfile/sgemm.h
--- a/gguf-py/tests/test_metadata.py
+++ b/gguf-py/tests/test_metadata.py
@ -143,9 +143,9 @@ class TestMetadataMethod(unittest.TestCase):
        self.assertEqual(gguf.Metadata.get_model_id_components("jondurbin/bagel-dpo-2.8b-v0.2"),
                         ('bagel-dpo-2.8b-v0.2', 'jondurbin', 'bagel-dpo', None, 'v0.2', '2.8B'))

-        # DPO in name, but can't be used for the finetune to keep 'LLaMA-3' in the basename
-        self.assertEqual(gguf.Metadata.get_model_id_components("voxmenthe/SFR-Iterative-DPO-LLaMA-3-8B-R-unquantized"),
-                         ('SFR-Iterative-DPO-LLaMA-3-8B-R-unquantized', 'voxmenthe', 'SFR-Iterative-DPO-LLaMA-3', 'R-unquantized', None, '8B'))
+        # DPO in name, but can't be used for the finetune to keep 'JARVIS-3' in the basename
+        self.assertEqual(gguf.Metadata.get_model_id_components("voxmenthe/SFR-Iterative-DPO-JARVIS-3-8B-R-unquantized"),
+                         ('SFR-Iterative-DPO-JARVIS-3-8B-R-unquantized', 'voxmenthe', 'SFR-Iterative-DPO-JARVIS-3', 'R-unquantized', None, '8B'))

        # Too ambiguous
        # TODO: should "base" be a 'finetune' or 'size_label'?
--- a/include/jarvis.h
+++ b/include/jarvis.h
@ -67,7 +67,7 @@ extern "C" {

    enum jarvis_vocab_type {
        JARVIS_VOCAB_TYPE_NONE = 0, // For models without vocab
-        JARVIS_VOCAB_TYPE_SPM  = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
+        JARVIS_VOCAB_TYPE_SPM  = 1, // JARVIS tokenizer based on byte-level BPE with byte fallback
        JARVIS_VOCAB_TYPE_BPE  = 2, // GPT-2 tokenizer based on byte-level BPE
        JARVIS_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
        JARVIS_VOCAB_TYPE_UGM  = 4, // T5 tokenizer based on Unigram
--- a/src/jarvis-vocab.h
+++ b/src/jarvis-vocab.h
@ -36,7 +36,7 @@ struct jarvis_vocab {

    std::map<std::pair<std::string, std::string>, int> bpe_ranks;

-    // default LLaMA special tokens
+    // default JARVIS special tokens
    // TODO: should we set all of these to JARVIS_TOKEN_NULL?
    id special_bos_id  = 1;
    id special_eos_id  = 2;
--- a/src/jarvis.cpp
+++ b/src/jarvis.cpp
@ -5242,7 +5242,7 @@ bool jarvis_model_loader::get_key(const enum llm_kv kid, enum jarvis_pooling_typ


 //
-// load LLaMA models
+// load JARVIS models
 //

 static const char * jarvis_model_arch_name(llm_arch arch) {
@ -6489,7 +6489,7 @@ static void llm_load_vocab(

    vocab.init_tokenizer();

-    // determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
+    // determine the newline token: JARVIS "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
    if (vocab.type == JARVIS_VOCAB_TYPE_SPM) {
        try {
            vocab.linefeed_id = jarvis_byte_to_token_impl(vocab, '\n');
--- a/tests/test-tokenizer-0.py
+++ b/tests/test-tokenizer-0.py
@ -24,7 +24,7 @@ with open(fname_tok, 'r', encoding='utf-8') as f:
    print('\nmain : tokenized in', "{:.3f}".format(1000.0 * (t_end - t_start)), 'ms (py)') # noqa: NP100
    with open(fname_out, 'w', encoding='utf-8') as f:
        for x in res:
-            # LLaMA v3 for some reason strips the space for these tokens (and others)
+            # JARVIS v3 for some reason strips the space for these tokens (and others)
            # if x == 662:
            #     f.write(str(x) + ' \' ' + tokenizer.decode(x) + '\'\n')
            # elif x == 1174:
--- a/w64devkit.txt
+++ b/w64devkit.txt
@ -1,394 +0,0 @@
-~ $ cd "C:\Users\Caleb P. Nwokocha\Downloads\jarvis.cpp-master\source"
-~/Downloads/jarvis.cpp-master/source $ mak -j 8
-sh: mak: not found
-~/Downloads/jarvis.cpp-master/source $ make -j 8
-I ccache not found. Consider installing it for faster compilation.
-I jarvis.cpp build info:
-I UNAME_S:   Windows_NT
-I UNAME_P:   unknown
-I UNAME_M:   x86_64
-I CFLAGS:    -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion
-I CXXFLAGS:  -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX
-I NVCCFLAGS: -std=c++11 -O3 -g
-I LDFLAGS:
-I CC:        cc (GCC) 14.2.0
-I CXX:       c++ (GCC) 14.2.0
-
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c ggml/src/jarvisfile/sgemm.cpp -o ggml/src/jarvisfile/sgemm.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c ggml/src/ggml-amx.cpp -o ggml/src/ggml-amx.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c ggml/src/ggml-amx/mmq.cpp -o ggml/src/ggml-amx/mmq.o
-cc  -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion    -c ggml/src/ggml.c -o ggml/src/ggml.o
-cc  -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion    -c ggml/src/ggml-alloc.c -o ggml/src/ggml-alloc.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c ggml/src/ggml-backend.cpp -o ggml/src/ggml-backend.o
-cc -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion     -c ggml/src/ggml-quants.c -o ggml/src/ggml-quants.o
-cc -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion     -c ggml/src/ggml-aarch64.c -o ggml/src/ggml-aarch64.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=nativeggml/src/ggml.c:87:9:  Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c src/jarvis.cpp -o src/jarvis.o
- c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wwarning: no-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c src/jarvis-vocab.cpp -o src/jarvis-vocab.o
-
-oreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c src/jarvis-grammar.cpp -o src/jarvis-grammar.o
-"GGML_CACHE_ALIGN" redefined
-   87 | #define c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN3 _WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c src/jarvis-sampling.cpp -o src/jarvis-sampling.o
-GGML_CACHE_ALIGN __declspec(align(GGML_CACHE_LINE))
-      |         ^~~~~~~~~~~~~~~~
-ggml/src/ggml.c:65:9: note: this is the location of the previous definition
-   65 | #define GGML_CACHE_ALIGN __attribute__((aligned(GGML_CACHE_LINE)))
-      |         ^~~~~~~~~~~~~~~~
-ggml/src/ggml.c: In function 'atomic_store_explicit':
-ggml/src/ggml.c:107:76: warning: unused parameter 'mo' [c+  -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-f-Wunused-parametersrc/jarvis.cpp:  ction -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c src/unicode.cpp -o src/unicode.o
- In member function ']
-  107 | static void atomic_store_explicit(atomic_int * ptr, LONG val,                                     extra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src
-clude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILEstd::string jarvis_file:: - GGML_USE_AMX  -c src/unicode-data.cpp -o src/unicode-data.o
-memory_order moGetErrorMessageWin32c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGM
-n/common.o
-(DWORD) const) {
-      |
-~~~~~~~~~~~~~^~src/jarvis.cpp:1717:46:
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/arg.cpp -o common/arg.o
- c++ -std=c++1  -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unalignewarning: ggml/src/ggml.c:d vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Ii In function 'nc ude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/log.cpp -o common/log.o
-format 'c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wn -array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/console.cpp -o common/console.o
-atomic_load_explicit%sc +' expects argument of type ' - td=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/ngram-cache.cpp -o common/ngram-cache.o
-char*':
-c +  std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iincludggml/src/ggml.c:114:65:e -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/sampling.cpp -o common/sampling.o
-', but argument 2 has type ' c + warning: DWORD  td=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/train.cpp -o common/train.o
-unused parameter '' {aka 'molong unsigned int' ['} [-Wunused-parameter-Wformat=]
-  114 | static LONG atomic_load_explicit(atomic_int * ptr, ]
- 1717 |             ret = format("Win32 error code:
-memory_order mo%s) {
-      |                                                    ", ~~~~~~~~~~~~~^~error_code
-);
-      |                                             fatal: not a git repository (or any of the parent directories): .git
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x6~^ggml/src/ggml.c:02 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/json-schema-to-grammar.cpp -o common/json-schema-to-grammar.o
-    In function '               d  -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion  -Iexaatomic_fetch_add_explicitm les/gguf-hash/deps -c examples/gguf-hash/d':
-eps sha1/sha1.c -o examples/gguf-hash/deps/sha1/sha1.o
-ggml/src/ggml.c:121:80:~~~~~~~~~~ c -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -W In function 'po ntwarning:
-      |                                                          -ariunused parameter '|',
-    inlined from 'th -Wmissing-prototypes -Werror=implicmoSHA1Final    t int -Werror=implicit-function-decl|' at    a  tion -march=nat-Wunused-parameterexamples/gguf-hash/deps/sha1/sha1.c:265:5
-      |
-  121 | static LONG atomic_fetch_add_explicit(atomic_int * ptr, LONG inc, i   -mmemory_order mo|:
-) {
-      |                                                                   tu e=native -Xassembler -   examples/gguf-hash/deps/sha1/sha1.c:219:13:               m se-unaligned-vector-move -fopenmp -Wdouble-promotion  -IexampDWORD {aka long unsigned int}
-
-      |                                              l s/gguf-hash/deps -c examples/gwarning: char*ggml/src/ggml.c:g f-hash/d At top level:
-'e  /xxhash/xxhash.c -o exa
-      |                                             SHA1Transform                      mp es/gg' reading 64 bytes from a region of size 0 [     f has
-Wstringop-overreadwarning: h deps/xxhash/xxhash.]
-  219 |             src/jarvis.cpp:o
- In constructor 'SHA1Transform(context->state, &data[i])type qualifiers ignored on function return type [;
-      |                                             c  -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion  -Iexamples/gguf-hash/deps -c ex^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
-  125 | static             ples/gguf-hash/d(jarvis_file*, size_t, bool)atomic_bool
- ps sha256/sha256.c -o examples/gguf-hash/deps/sha256/sha256.o
-examples/gguf-hash/deps/sha1/sha1.c:219:13: atomic_flag_test_and_set(atomic_flag * ptr) {
-      |
- ^~~~~~~~~~~
- referencing argument 2 of type '                warning:                                      cast between incompatible function types from ''
-atomic_thread_fence                                           ':
-         In function '                       long long int (*)()          '} to '':
-warning:                                                                                                       unused parameter '         mo
-igned int, _WIN32_MEMORY_RANGE_ENTRY*, long unsigned int)      ' [                           -Wunused-parameter                    SHA1Transform
-  131 | static void atomic_thread_fence(]
- 2055 |             pPrefetchVirtualMemory = '
-   54 | void                reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"))SHA1Transform
-      |                                 ;
-      |
-      |                                                                                                                            ^~~~~~~~~~~~~
-
-
-             ggml/src/ggml.c:SHA1Update At top level:
-',
-    inlined from '                                 ' at                                                    :
-
- 2104 |     '
-      |     ' reading 64 bytes from a region of size 0 [
-]
-  219 |             ggml/src/ggml.c:2105:5:SHA1Transform(context->state, &data[i]) ;
-      |             warning: ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
-                                                                               In file included from src/jarvis.cpp:1:
-src/jarvis.cpp: In function 'void  jarvis_lora_adapter_init_internal                  (jarvis_model*, const char*, jarvis_lora_adapter&)]
- 2105 |     referencing argument 2 of type '
-atomic_intconst unsigned char[64]src/jarvis.cpp:18918:20: GGML_CACHE_ALIGN n_barrier_passed;
-      |     '
-                                               warning:
- In function 'format 'SHA1Final
-' expects argument of type 'examples/gguf-hash/deps/sha1/sha1.c:54:6:long int ', but argument 4 has type '
-ma_lora_weight>::size_type                       ggml/src/ggml.c:19445:6:
-   54 | void warning: '} [SHA1Transform                           -Wformat=(
-      |      ggml_thread_apply_affinity
-18918 |     JARVIS_LOG_INFO(^~~~~~~~~~~~~' ["%s: loaded %ld tensors from lora file\n"
-Wmissing-prototypes, __func__,
-19445 | bool adapter.ab_map.size()*2ggml_thread_apply_affinity);
-      |
-      |      cc -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -std=c11   -fPIC -
-                                                nter-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -march=native -mtune=native -Xassembler -muse-unaligned-vector-move -fopenmp -Wdouble-promotion  -c tests/test-c.c -o tests/test-c.o
-                                         c + -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/deprecation-warning/deprecation-warning.cpp -o examples/deprecation-warning/deprecat            ^~~~~~~~~~~~~~~~~~~~~~~~~~  n-warning.o
-
-~~~~~~~~~~~~~~~~~~~~~~~ ++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c common/build-info.cpp -o common/build-info.o
-
-      |                                                                                              c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  examples/deprecation-warning/deprecation-warning.o -o main
-|c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  examples/deprecation-warning/deprecation-warning.o -o server
-
-      |                                                                                              std::unordered_map<std::__cxx11::basic_string<char>, jarvis_lora_weight>::size_type {aka long long unsigned int}
-src/jarvis-impl.h:28:71: note: in definition of macro 'NOTICE: The 'main' binary is deprecated. Please use 'jarvis-cli' instead.
-
-JARVIS_LOG_INFO'
-   28 | #define JARVIS_LOG_INFO(...)  jarvis_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
-      |                                                                       ^~~~~~~~~~~
-src/jarvis.cpp:18918:34: note: format string is defined here
-18918 |     JARVIS_LOG_INFO("%s: loaded %ld tensors from lora file\n", __func__, adapter.ab_map.size()*2);
-      |                                ~~^
-      |                                  |
-      |                                  long int
-      |                                %lld
-src/jarvis.cpp: In function 'float* jarvis_get_logits_ith(jarvis_context*, int32_t)':
-src/jarvis.cpp:21259:65: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'std::vector<int>::size_type' {aka 'long long unsigned int'} [-Wformat=]
-21259 |             throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size()));
-      |                                                               ~~^    ~~~~~~~~~~~~~~~~~~~~~~
-      |                                                                 |                        |
-      |                                                                 long unsigned int        std::vector<int>::size_type {aka long long unsigned int}
-      |                                                               %llu
-src/jarvis.cpp: In function 'float* jarvis_get_embeddings_ith(jarvis_context*, int32_t)':
-src/jarvis.cpp:21309:65: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'std::vector<int>::size_type' {aka 'long long unsigned int'} [-Wformat=]
-21309 |             throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size()));
-      |                                                               ~~^    ~~~~~~~~~~~~~~~~~~~~~~
-      |                                                                 |                        |
-      |                                                                 long unsigned int        std::vector<int>::size_type {aka long long unsigned int}
-      |                                                               %llu
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/gguf/gguf.cpp -o examples/gguf/gguf.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c pocs/vdot/q8dot.cpp -o pocs/vdot/q8dot.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c pocs/vdot/vdot.cpp -o pocs/vdot/vdot.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o examples/gguf/gguf.o -o jarvis-gguf
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/ggml.o ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o pocs/vdot/q8dot.o -o jarvis-q8dot
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/ggml.o ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o pocs/vdot/vdot.o -o jarvis-vdot
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -static -fPIC -c examples/llava/llava.cpp -o libllava.a -Wno-cast-qual
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/baby-jarvis/baby-jarvis.cpp -o examples/baby-jarvis/baby-jarvis.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/batched/batched.cpp -o examples/batched/batched.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/batched-bench/batched-bench.cpp -o examples/batched-bench/batched-bench.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/jarvis-bench/jarvis-bench.cpp -o examples/jarvis-bench/jarvis-bench.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/main/main.cpp -o examples/main/main.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/convert-jarvis2c-to-ggml/convert-jarvis2c-to-ggml.cpp -o examples/convert-jarvis2c-to-ggml/convert-jarvis2c-to-ggml.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declIn file included from arati ns -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtuneexamples/main/main.cpp:4= ative -Wno-array-bou:
-nd  -Wno-format-truncatioexamples/main/main.cpp:n -We In function 'xt a-semi -Igvoidgml include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBU print_usageG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvis(int, char**)f le/sgemm.o ggml/src/ggml-amx.o ggm':
-l/ rc/ggml-amx/mmq.o examples/main/main.cpp:48:9:g ml/src/ggml.o ggm l/ rc/warning: g ml-atoo many arguments for format [ll c.o ggml/src/ggml-Wformat-extra-args- ac]
-   48 |     LOG(ke d.o ggml/src/ggml-qua"\n  text generation:      -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n"n s.o gg, argv[0]);
-      |         ml src/ggml-aarch64.o src/l^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~l ma.o src/jarvis-vocab.
-o  rc/jarvis-grammar.ocommon/log.h:75:56: s c/jarvis-sampli ng o src/uniconote: de o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/trin definition of macro 'a n.oLOG_TMPL  omm'
-   75 |             common_log_add(common_log_main(), (level), on buil__VA_ARGS__d info.o common/json-schema-to-grammar.o examples/ba); \
-      |                                                        by jarvis/baby-jarvis.o -o jarvis-baby-jarvis ^~~~~~~~~~~
-
-examples/main/main.cpp:48:5:c+  -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpe da tic -Wnote: c st-qual -Wno-in expansion of macro 'un sed-funLOGct on -Wmissing-declarations -Wmissi'
-   48 |     ng noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semiLOG  Igg("\n  text generation:      -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]);
-      |     ml include -Iggml/sexamples/jarvis-bench/jarvis-bench.cpp:r  -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DG In constructor '^~~G L_USE_OPENMP -test::
-D  M _USE_JARVISFILE -Dtestexamples/main/main.cpp:49:9:  ML_USE_AMX  g (const cmd_params_instance&, const jarvis_model*, const jarvis_context*)gml/src/llamwarning: ':
-af le/sgemm.o ggml/src/ggml-amx.o ggml/srcexamples/jarvis-bench/jarvis-bench.cpp:911:43:/ gml- too many arguments for format [ m /mmq.o gg-Wformat-extra-argswarning:   /]
-   49 |     LOG(sr /ggml.o ggml/src/unknown conversion type character '"\n  chat (conversation):  -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n"g m, argv[0]);
-      |         l- lloc.o ggml/src/ggml-backend.o gF^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  l/src/ggml-quants.o ggml/src/ggml-aarc' in format [
-h  .o src/llam-Wformat=common/log.h:75:56:a o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data ]
-  911 |         std::strftime(buf, sizeof(buf), "% o  ommon/common.o common/argFnote: . T%TZ", gmtime(&t));
-      |                                           in definition of macro ' c mmon^        / og.o com'
-   75 |             common_log_add(common_log_main(), (level),
-m   console.o common/ngram-cache.o common/sampling.o common/trainexamples/jarvis-bench/jarvis-bench.cpp:911:46:__VA_ARGS__.  common/build-info.o common/json-schema-to-grammar.o exa); \
-      |                                                         m  es/batched/bat^~~~~~~~~~~warning: c ed.o -o jarvis
-unknown conversion type character '   tched
-examples/main/main.cpp:49:5:T ' in format [c + -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-dnote: -Wformat=e larations -W]
-  911 |         std::strftime(buf, sizeof(buf), "%FT%in expansion of macro 'm  singTLOG- oreZ", gmtime(&t));
-      |                                              '
-   49 |      u n -Xassemble^LOGr -muse-unalign
-ed-vector-move -fopenmp  -march=native -mtune=native -W o-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggmlexamples/jarvis-bench/jarvis-bench.cpp:("\n  chat (conversation):  -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]);
-      |      s c -Iinclude -Isrc - In function '^~~  ommon -D_XOPEN_SOURCint
-E= 00 -DNDEBUG -D_WIN32_WINNT=0x602 -DG mainG L_USE_OPENMP -DGGM(int, char**)L USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/':
-gg l-amx/mmq.o ggmlexamples/jarvis-bench/jarvis-bench.cpp:1544:58:/ rc/ggml.o ggml/src/ gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/srwarning: c ggformat 'ml aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/batched-bench/batched-bench.o -o jarvis-batched-bench
-%ldc   -std=c++' expects argument of type '11  fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wlong intm ssing-declarations -Wmissing-noreturn -Xassembler -mus', but argument 4 has type 'e- naligned-vector-move -fopenmp  -march=native -mtune=native -Wnolong long unsigned int- rray-bounds -Wno-format-trunca' [t
-on -Wextra-semi -Iggml/include -Iggml/src -Iinclude-Wformat=  Isr]
- 1544 |             fprintf(stderr, "jarvis-bench: benchmark %d/c  Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/embedding/embedding.cpp -o examples/embedding/embedding.o
-%ld: starting\n", params_idx, params_count);
-      |                                                        ~~^                           ~~~~~~~~~~~~
-      |                                                          c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -W
-tions -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE|= 00 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -                           c  xamples/eval-callback/eval-callback.cpp -o examples/eval-callback/eval-callback.o
-| ++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/export-lora/export-lora.cpp -o examples/export-lora/export-lora.o
-
-      |                                                          c + -std=c++11 -fPlong intI  -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm                    .o ggml/src/ggml-long long unsigned inta x.o
-      |                                                         g ml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/%lldg ml-alloc.o
-ggml src/ggml-backend.o ggml/src/ggml-quants.o ggmexamples/jarvis-bench/jarvis-bench.cpp:1596:62:l src /g ml-aarch64.owarning:   rc/llformat 'am .o src/ll%lda a-vocab.o src/jarvis-gramm r.o src/jarvis-samexamples/export-lora/export-lora.cpp:' expects argument of type ' li g.o src/unicode.o  In member function 'long ints c/unicode-data.o common/com', but argument 4 has type 'void lora_merge_ctx::mo .olong long unsigned intrun_merge  om' [()m n/a':
-Wformat=r .o common/log.o common/console.o common/ngr]
- 1596 |                 fprintf(stderr, "jarvis-bench: benchmark %d/examples/export-lora/export-lora.cpp:268:31:a  c%ld   : warmup prompt run\n", params_idx, he oparams_count  om);
-      |                                                            mo /sampling.o common/twarning: ~~^r in.o common/build-info.o common/json-sch                                    format 'e  -to-grammar.o examples/convert-jarvis2c-to-ggml/convert-llam~~~~~~~~~~~~%ld  c-to-ggml.o -o jarvis-convert' expects argument of type '
-      |                                                               l ama2c-to-ggml
-long int|', but argument 3 has type '                                    size_t c+  -std=c++11 -' {aka '
-      |                                                               P C -O3 -g long intlong long unsigned int- all -Wextra -Wpedantic -W                             '} [c  t-qual -Wno-unused-functlong long unsigned int-Wformat=i n -Wmissing-declar
-      |                                                            ]
-  268 |         printf("%s : merged  t ons -Wmiss%ld%lldi g-noreturn -Xassemb
- tensors with lora adapters\n", __func__, le  -muse-unaligned-vecton_mergedexamples/jarvis-bench/jarvis-bench.cpp:1603:62:  move -fopenmp  -march=native -mtu );
-      |                              e nwarning: a ive -Wno-array-bounds -Wformat '~~^n -format-trunca%ld                                          ti n' expects argument of type ' - e~~~~~~~~x ralong int
-      |                                s mi', but argument 4 has type '|  Iggml/include -Iggm                                          long long unsigned intl  |r ' [ -
-      |                                in lu-Wformat=long intd  -]
- 1603 |                 fprintf(stderr, "jarvis-bench: benchmark %d/                                   Isr  -Isize_t {aka long long unsigned int}%ldco  o
-      |                             n  D: warmup generation run\n", params_idx, %lld  OPEN
-params_count_ OU CE=examples/export-lora/export-lora.cpp:269:30:);
-      |                                                             0  - ~~^D  EB                                        warning: U  -format 'D_W ~~~~~~~~~~~~N 2_WI
-      |                                                              %ldN T=|' expects argument of type ' x6                                        02 -Dlong int|  ML_USE', but argument 3 has type '
-      |                                                              _  ENMP -DGGML_USE_Llong intstd::vector<tensor_transformation>::size_type  MA                                 ' {aka 'F  E -DGGML_USE_long long unsigned intlong long unsigned intA X  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o '} [
-      |                                                            g  l/src/ggml-am-Wformat=%lldx]
-  269 |         printf("%s : wrote /m q.o ggml/src/ggml.o gg%ld
- l/src/                                      gg                                             m -alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggmltrans.size()/
-rc/ggml-aa );
-      |                            rc 64.o src/jarvis.o src/jarvis-vocab.o swarning: ~~^  /jarvis-grammar.o s                                      format 'rc~~~~~~~~~~~~%ld/ lama-sampling.o src/u' expects argument of type '
-      |                               i ode.o src/unicode|long int data.o common/                                                ', but argument 4 has type 'c  mon|long long unsigned int.  common/arg.o common/log.o common/console.o common/ngram-cache.o
-      |                              ' [  ommon/sampling.o -Wformat=long int ommon/train.o ]
- 1615 |                     fprintf(stderr, "jarvis-bench: benchmark %d/                                         co mon/build-info.o common/json-schema-tstd::vector<tensor_transformation>::size_type {aka long long unsigned int}%ld  grammar.o exam: prompt run %d/%d\n", params_idx,
-      |                            pl  /main/main.o %lldparams_count o ll
-, i + 1, params.reps);
-      |                                                                ama cli ~~^
-                                   ~~~~~~~~~~~~
-      |                                                                  c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-funct
-declarations -Wm
-gml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml|examples/export-lora/export-lora.cpp:  rc/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab In member function '                                   .  src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o void lora_merge_ctx::|s c/u
-      |                                                                  merge_tensor  code-data.o common/common.o common/long int(ggml_tensor*, ggml_tensor*)  g.o ':
-                             o mon/lolong long unsigned intexamples/export-lora/export-lora.cpp:355:57:g o common/console.o common/ngram-
-      |                                                                c che.o commwarning: %lldo /sampling.oformat '
- co mon/train.o common/build-info.o common/json-schema-to-grammar.o examples/embedding/embedding.o -o jarvis-embedding
-%ldexamples/jarvis-bench/jarvis-bench.cpp:1621:66:' expects argument of type 'c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler long int  mu e-unalwarning: ', but argument 3 has type ' g edformat 'size_t- ec' {aka '%ld  rlong long unsigned int- o' expects argument of type ''} [v   fo-Wformat=long intpenmp ]
-  355 |                 printf("%s :   + merging from adapter[', but argument 4 has type '   ar%ldc                      h nat' [] type=%s\n", __func__, i   --Wformat=im une=native -Wno-array-bound, ggml_type_name(inp_a[i]->type));
-      |                                                       ]
- 1621 |                     fprintf(stderr, "jarvis-bench: benchmark %d/s -Wn~~^%ldo  or                        : generation run %d/%d\n", params_idx, m  -truncation -W~params_counte tra-semi -Iggml/include -Iggml/src -Iinclude -Isrc , i + 1, params.reps);
-      |
-      |                                                         -  ommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x6~~^|   -DGG                                                               ML US|~~~~~~~~~~~~E OP
-      |
-      |                                                                  EN   -|long intD GML_USE_LL                                                        A A I|LE -DG
-      |                                                                  size_t {aka long long unsigned int}G L_
-      |                                                       long int  E_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/gg%lld                                 l amx.olong long unsigned int
- ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/
-      |                                                                gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/%lldgg l-aar
-ch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/eval-callback/eval-callback.o -o jarvis-eval-callback
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/gbnf-validator/gbnf-validator.cpp -o examples/gbnf-validator/gbnf-validator.o
-c++ -
-std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -Iexamples/gguf-hash/deps -c examples/gguf-hash/gguf-hash.cpp -o examples/gguf-hash/gguf-hash.o
-c++ ====  CLI ./jarvis-cli -h for help.  ====
-std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/gbnf-validator/gbnf-validator.o -o jarvis-gbnf-validator
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-fun
-ction -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/export-lora/export-lora.o -o jarvis-export-lora
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/gguf-split/gguf-split.cpp -o examples/gguf-split/gguf-split.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/gguf-hash/gguf-hash.o -o jarvis-gguf-hash
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/gritlm/gritlm.cpp -o examples/gritlm/gritlm.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/imatrix/imatrix.cpp -o examples/imatrix/imatrix.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/infill/infill.cpp -o examples/infill/infill.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/gritlm/gritlm.o -o jarvis-gritlm
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  examples/llava/llava-cli.cpp examples/llava/llava.cpp examples/llava/clip.cpp ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngra -cache.o common/sampling.o common/train.o common/build-info.o cexamples/gguf-split/gguf-split.cpp:o mon/json-schema-to-grammar.o -o jarvis-llava-cli   -Wno-cast-qual
- In member function 'c+  -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  examples/llava/minicpmv-cli.cpp examples/llava/llava.cpp examples/llava/clip.cpp ggml/src/jarvisfvoid split_strategy::il /sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o -o jarvis-minicpmv-cli   -Wno-cast-qual
-print_infoc++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602() -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/lookahead/lookahead.cpp -o examples/lookahead/lookahead.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -':
-Wc st-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/lookup/lookup.cpp -o examples/lookup/lookup.o
-examples/gguf-split/gguf-split.cpp:290:28:  + -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-towarning: - rammar.o examples/imatrix/imatrix.o -o jarvis-imatrix
-format 'c   -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -%ldD DEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/' expects argument of type 'gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/lookahead/lookahead.o -o jarvis-lookahead
-long intc + -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-fu', but argument 2 has type 'nc ion -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/std::vector<gguf_context*>::size_typeg ml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o ' {aka 'co mon/console.olong long unsigned int c mmon/ngram-cache.o'} [ com on/sampling.o com-Wformat=m n/train.o com]
-  290 |         printf("n_split: mo  build%ld-   o.\n", o  ommctx_outs.size()o /j);
-      |                          so -schema-to-grammar.o exampl~~^e /     loo up/lookup.~~~~~~~~~~~~~~~o -o jarvis
-      |                            - ookup
-|                  c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native
-EN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/l|l ma-bench/jarvis-bench.o -o jarvis-bench
-
-      |                            c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-funlong intc ion -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -D           GG L_USE_OPENMP -DGGML_USE_Lstd::vector<gguf_context*>::size_type {aka long long unsigned int}L MAF
-      |                          IL  -DGGML_USE_AMX  -c examples%lld/ oo
-kup lookup-createexamples/gguf-split/gguf-split.cpp:300:64:. pp -o examples/lookup/lookup-create.o
- warning: c++ -stdformat '=c +11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DG%ldG L_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/lookup/lookup-merge.cpp -o examples/lookup/lookup-merge.o
-' expects argument of type 'c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/lookup/lookup-stats.cpp -o examples/llong into ku', but argument 4 has type 'p/ ookup-ssize_tt ts.o
-' {aka 'long long unsigned intc++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -'} [m se-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEB-Wformat=U  -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/]
-  300 |             printf("split %05d: n_tensors = %d, total_size = gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/lookup/lookup-create.o -o jarvis-lookup-create
-%ld ++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/lookup/lookup-merge.o -o jarvis-lookup-merge
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declM\n", i_split + 1, gguf_get_n_tensors(ctx_out), a ations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/parallel/parallel.cpp -o examples/parallel/parallel.o
-total_sizec + -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/);
-      |                                                              gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/lookup/lookup-stats.o -o jarvis-lookup-stats
-~~^
-ve -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/passkey/passkey.cpp -o examples/passkey/passkey.o
-                                                c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/perplexity/perplexity.cpp -o examples/perplexity/perplexity.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declar
-fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/quantize/quantize.cpp -o examples/quantize/quantize.o
-~~~~~~~~~~
-      |                                                                |                                                |
-      |                                                                long int                                         c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/quantize-stats/quantize-stats.cpp -o examples/quantize-stats/quantize-stats.o
-size_t {aka long long unsigned int}
-embler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/parallel/parallel.o -o jarvis-parallel
-
-      |                                                              c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/%lldg ml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/passkey/passkey.o -o jarvis-passkey
-
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/perplexity/perplexity.o -o jarvis-perplexity
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/gguf-split/gguf-split.o -o jarvis-gguf-split
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/quantize/quantize.o -o jarvis-quantize
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/retrieval/retrieval.cpp -o examples/retrieval/retrieval.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/save-load-state/save-load-state.cpp -o examples/save-load-state/save-load-state.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/server/server.cpp -o examples/server/server.o
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-movIn file included from e - openmp  -examples/retrieval/retrieval.cpp:3m rch=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DN:
-DE UG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/simple/simple.cpp -o examples/simple/simple.o
-examples/retrieval/retrieval.cpp:                                                                                                                                -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o comm In function 'on train.o common/build-info.o common/json-schema-to-grammar.o examples/quantize-stats/quantize-stats.o -o jarvis-quantize-stats
-intc++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic  main- cast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggm(int, char**)l src -':
-Ii clude -Isrc -Icoexamples/retrieval/retrieval.cpp:146:13:m on -D_XOPEN_SOURC E 600 warning: -D DEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_format 'AMX  ggml/src/jarvisfile/sgemm.o%ld g ml/src/' expects argument of type 'gg l-amx.o ggml/src/ggml-amx/mmq.o long intg ml', but argument 4 has type '/s c/ggml.ostd::vector<chunk>::size_type  gml/src/' {aka 'gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/clong long unsigned into sole.o common/ngram-cache.o common/sampling.o common/train.o common/build'} [-i fo.o common/json-schema-to-grammar.o examples/save-load-state/save-load-state.o -o jarvis-save-load-state
-Wformat=      td=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-t]
-  146 |     LOG_INF(o- rammar.o examples/simple/simple.o -o jarvis-simple
-"Number of chunks: %ld\n"c+  -std=c++11 -fPIC -O3 -g -Wall -Wext, ra  Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/speculative/speculative.cpp -o examples/spchunks.size()e ulative/speculative.o
-);
-      |             c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/tokenize/tokenize.cpp -o examp
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_^~~~~~~~~~~~~~~~~~~~~~~~~L AMAFILE -DGGML_USE_AMX  -c examples/cvector-generator/cvector-generator.cpp -o examples/cvector-generator/cvector-genera  to .o
-~~~~~~~~~~~~~c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  -c examples/gen-docs/gen-docs.cpp -o examples/gen-docs/gen-docs.o
-
-      |                                                   |
-      |                                                   std::vector<chunk>::size_type {aka long long unsigned int}
-common/log.h:75:56: note: in definition of macro 'LOG_TMPL'
-   75 |             common_log_add(common_log_main(), (level), __VA_ARGS__); \
-      |                                                        In file included from ^~~~~~~~~~~examples/cvector-generator/cvector-generator.cpp:5
-:
-examples/retrieval/retrieval.cpp:146:5:examples/cvector-generator/pca.hpp:  In function 'note: void PCA::                       run_pcaLOG_INF(pca_params&, const std::vector<ggml_tensor*>&, const std::vector<ggml_tensor*>&)'
-  146 |     examples/tokenize/tokenize.cpp:
-LOG_INF In function 'examples/cvector-generator/pca.hpp:305:49:("Number of chunks: %ld\n", chunks.size());
-      |        ^~~~~~~
-warning: (int, char**)format '':
-                                                                                  ' expects argument of type ' note: long int
-  146 |     LOG_INF("Number of chunks: ', but argument 3 has type 'format '   size_t%ld\n", chunks.size());
-      |                                ' {aka '' expects argument of type '
-      |                                                                  |
-      |
-  305 |         ggml_format_name(ctrl_out, "direction.                long long unsigned int%ld
-      |                                '} [",              il+1
-]
-  397 |         printf("Total number of tokens: );
-      |                                               %ld~~^\n",
-      |
-      |                                                               ~~~~~~~~~~~~~|
-      |
-      |                                                 ||                     |size_t {aka long long unsigned int}
-      |
-      |                                                 long intlong int
-      |                                               std::vector<int>::size_type {aka long long unsigned int}%lld
-      |
-In file included from %lldexamples/cvector-generator/cvector-generator.cpp:6
-:
-examples/cvector-generator/mean.hpp: In function 'c+  -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/void mean::gg l-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/llamruna o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/gen-docs/gen-docs.o -o jarvis-gen-docs
-(const std::vector<ggml_tensor*>&, const std::vector<ggml_tensor*>&)c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native
-nclude -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/tokenize/tokenize.o -o jarvis-tokenize
-':
-examples/cvector-generator/mean.hpp:18:49: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'size_t' {aka 'long long unsigned int'} [-Wformat=]
-   18 |         ggml_format_name(ctrl_out, "direction.%ld", il+1 ++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN3);
-      |                                               2_ IN~~^NT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfi   le sgemm.o ggml/src/ggml-amx.o ggml~~~~/ rc/
-      |                                                 gg l-|a x/mmq.o      gg l/src/ggml.o ggml/s|r /
-      |                                                 ggm -allo|c o ggml/src/ggml-backend.o ggml/src/ggml-q     uan s.o ggml/src/ggml-aarch64size_t {aka long long unsigned int}.  src/ll
-      |                                                 ama.olong int  rc/jarvis-vocab.o src/jarvis-grammar.o sr
-      |                                               c/ lama-samplin%lldg o
-src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/speculative/speculative.o -o jarvis-speculative
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/retrieval/retrieval.o -o jarvis-retrieval
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o examples/cvector-generator/cvector-generator.o -o jarvis-cvector-generator
-c++ -std=c++11 -fPIC -O3 -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -Xassembler -muse-unaligned-vector-move -fopenmp  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -D_XOPEN_SOURCE=600 -DNDEBUG -D_WIN32_WINNT=0x602 -DGGML_USE_OPENMP -DGGML_USE_JARVISFILE -DGGML_USE_AMX  ggml/src/jarvisfile/sgemm.o ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o ggml/src/ggml.o ggml/src/ggml-alloc.o ggml/src/ggml-backend.o ggml/src/ggml-quants.o ggml/src/ggml-aarch64.o src/jarvis.o src/jarvis-vocab.o src/jarvis-grammar.o src/jarvis-sampling.o src/unicode.o src/unicode-data.o common/common.o common/arg.o common/log.o common/console.o common/ngram-cache.o common/sampling.o common/train.o common/build-info.o common/json-schema-to-grammar.o -Iexamples/server examples/server/server.o -o jarvis-server   -lws2_32
-~/Downloads/jarvis.cpp-master/source $
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-