Prefix all example bins w/ llama-

2024-06-08 13:42:01 +01:00 · 2024-06-08 13:42:01 +01:00 · ab5efbb3b6
commit ab5efbb3b6
parent 23d0df5bd5
52 changed files with 288 additions and 254 deletions
--- a/examples/baby-llama/CMakeLists.txt
+++ b/examples/baby-llama/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET baby-llama)
+set(TARGET llama-baby)
 add_executable(${TARGET} baby-llama.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/batched-bench/CMakeLists.txt
+++ b/examples/batched-bench/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET batched-bench)
+set(TARGET llama-batched-bench)
 add_executable(${TARGET} batched-bench.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/batched-bench/README.md
+++ b/examples/batched-bench/README.md
@ -10,16 +10,16 @@ There are 2 modes of operation:
 - `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`)

 ```bash
-./batched-bench -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps]
+./llama-batched-bench -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps]

 # LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
-./batched-bench -m ./models/llama-7b/ggml-model-f16.gguf -c 16384 -b 2048 -ub 512 -ngl 99
+./llama-batched-bench -m ./models/llama-7b/ggml-model-f16.gguf -c 16384 -b 2048 -ub 512 -ngl 99

 # LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
-./batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 16384 -b 2048 -ub 512 -ngl 99 -pps
+./llama-batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 16384 -b 2048 -ub 512 -ngl 99 -pps

 # custom set of batches
-./batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 2048 -b 512 -ub 512 -ngl 999 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32
+./llama-batched-bench -m ./models/llama-7b/ggml-model-q8_0.gguf -c 2048 -b 512 -ub 512 -ngl 999 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32
 ```

 ## Sample results
--- a/examples/batched.swift/Makefile
+++ b/examples/batched.swift/Makefile
@ -1,6 +1,6 @@
 .PHONY: build

 build:
-	xcodebuild -scheme batched_swift -destination "generic/platform=macOS" -derivedDataPath build
-	rm -f ./batched_swift
-	ln -s ./build/Build/Products/Debug/batched_swift ./batched_swift
+	xcodebuild -scheme llama-batched-swift -destination "generic/platform=macOS" -derivedDataPath build
+	rm -f ./llama-batched-swift
+	ln -s ./build/Build/Products/Debug/llama-batched-swift ./llama-batched-swift
--- a/examples/batched.swift/Package.swift
+++ b/examples/batched.swift/Package.swift
@ -4,7 +4,7 @@
 import PackageDescription

 let package = Package(
-    name: "batched_swift",
+    name: "llama-batched-swift",
    platforms: [.macOS(.v12)],
    dependencies: [
        .package(name: "llama", path: "../../"),
@ -13,7 +13,7 @@ let package = Package(
        // Targets are the basic building blocks of a package, defining a module or a test suite.
        // Targets can depend on other targets in this package and products from dependencies.
        .executableTarget(
-            name: "batched_swift",
+            name: "llama-batched-swift",
            dependencies: ["llama"],
            path: "Sources",
            linkerSettings: [.linkedFramework("Foundation"), .linkedFramework("AppKit")]
--- a/examples/batched.swift/README.md
+++ b/examples/batched.swift/README.md
@ -1,4 +1,4 @@
 This is a swift clone of `examples/batched`.

 $ `make`
-$ `./batched_swift MODEL_PATH [PROMPT] [PARALLEL]`
+$ `./llama-batched-swift MODEL_PATH [PROMPT] [PARALLEL]`
--- a/examples/batched.swift/llama-batched-swift
+++ b/examples/batched.swift/llama-batched-swift
@ -0,0 +1 @@
+./build/Build/Products/Debug/llama-batched-swift
--- a/examples/batched/CMakeLists.txt
+++ b/examples/batched/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET batched)
+set(TARGET llama-batched)
 add_executable(${TARGET} batched.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/batched/README.md
+++ b/examples/batched/README.md
@ -3,7 +3,7 @@
 The example demonstrates batched generation from a given prompt

 ```bash
-./batched -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is" -np 4
+./llama-batched -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is" -np 4

 ...

--- a/examples/benchmark/CMakeLists.txt
+++ b/examples/benchmark/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET benchmark)
+set(TARGET llama-bench-matmult)
 add_executable(${TARGET} benchmark-matmult.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/convert-llama2c-to-ggml/CMakeLists.txt
+++ b/examples/convert-llama2c-to-ggml/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET convert-llama2c-to-ggml)
+set(TARGET llama-convert-llama2c-to-ggml)
 add_executable(${TARGET} convert-llama2c-to-ggml.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/convert-llama2c-to-ggml/README.md
+++ b/examples/convert-llama2c-to-ggml/README.md
@ -8,7 +8,7 @@ To convert the model first download the models from the [llama2.c](https://githu

 After successful compilation, following usage options are available:
 ```
-usage: ./convert-llama2c-to-ggml [options]
+usage: ./llama-convert-llama2c-to-ggml [options]

 options:
  -h, --help                       show this help message and exit
@ -19,7 +19,7 @@ options:

 An example command using a model from [karpathy/tinyllamas](https://huggingface.co/karpathy/tinyllamas) is as follows:

-`$ ./convert-llama2c-to-ggml --copy-vocab-from-model llama-2-7b-chat.gguf.q2_K.bin --llama2c-model stories42M.bin --llama2c-output-model stories42M.gguf.bin`
+`$ ./llama-convert-llama2c-to-ggml --copy-vocab-from-model llama-2-7b-chat.gguf.q2_K.bin --llama2c-model stories42M.bin --llama2c-output-model stories42M.gguf.bin`

 Note: The vocabulary for `stories260K.bin` should be its own tokenizer `tok512.bin` found in [karpathy/tinyllamas/stories260K](https://huggingface.co/karpathy/tinyllamas/tree/main/stories260K).

--- a/examples/embedding/CMakeLists.txt
+++ b/examples/embedding/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET embedding)
+set(TARGET llama-embedding)
 add_executable(${TARGET} embedding.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/embedding/README.md
+++ b/examples/embedding/README.md
@ -9,13 +9,13 @@ To get started right away, run the following command, making sure to use the cor
 ### Unix-based systems (Linux, macOS, etc.):

 ```bash
-./embedding -m ./path/to/model --log-disable -p "Hello World!" 2>/dev/null
+./llama-embedding -m ./path/to/model --log-disable -p "Hello World!" 2>/dev/null
 ```

 ### Windows:

 ```powershell
-embedding.exe -m ./path/to/model --log-disable -p "Hello World!" 2>$null
+llama-embedding.exe -m ./path/to/model --log-disable -p "Hello World!" 2>$null
 ```

 The above command will output space-separated float values.
--- a/examples/eval-callback/CMakeLists.txt
+++ b/examples/eval-callback/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET eval-callback)
+set(TARGET llama-eval-callback)
 add_executable(${TARGET} eval-callback.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/export-lora/CMakeLists.txt
+++ b/examples/export-lora/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET export-lora)
+set(TARGET llama-export-lora)
 add_executable(${TARGET} export-lora.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/export-lora/README.md
+++ b/examples/export-lora/README.md
@ -17,7 +17,7 @@ options:
 For example:

 ```bash
-./bin/export-lora \
+./bin/llama-export-lora \
    -m open-llama-3b-v2-q8_0.gguf \
    -o open-llama-3b-v2-q8_0-english2tokipona-chat.gguf \
    -l lora-open-llama-3b-v2-q8_0-english2tokipona-chat-LATEST.bin
--- a/examples/finetune/CMakeLists.txt
+++ b/examples/finetune/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET finetune)
+set(TARGET llama-finetune)
 add_executable(${TARGET} finetune.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/finetune/README.md
+++ b/examples/finetune/README.md
@ -7,7 +7,7 @@ Basic usage instructions:
 wget https://raw.githubusercontent.com/brunoklein99/deep-learning-notes/master/shakespeare.txt

 # finetune LORA adapter
-./bin/finetune \
+./bin/llama-finetune \
        --model-base open-llama-3b-v2-q8_0.gguf \
        --checkpoint-in  chk-lora-open-llama-3b-v2-q8_0-shakespeare-LATEST.gguf \
        --checkpoint-out chk-lora-open-llama-3b-v2-q8_0-shakespeare-ITERATION.gguf \
--- a/examples/finetune/finetune.sh
+++ b/examples/finetune/finetune.sh
@ -2,7 +2,7 @@
 cd `dirname $0`
 cd ../..

-EXE="./finetune"
+EXE="./llama-finetune"

 if [[ ! $LLAMA_MODEL_DIR ]]; then LLAMA_MODEL_DIR="./models"; fi
 if [[ ! $LLAMA_TRAINING_DIR ]]; then LLAMA_TRAINING_DIR="."; fi
--- a/examples/gbnf-validator/CMakeLists.txt
+++ b/examples/gbnf-validator/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET gbnf-validator)
+set(TARGET llama-gbnf-validator)
 add_executable(${TARGET} gbnf-validator.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common grammar-parser llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/gguf-split/CMakeLists.txt
+++ b/examples/gguf-split/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET gguf-split)
+set(TARGET llama-gguf-split)
 add_executable(${TARGET} gguf-split.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/gguf/CMakeLists.txt
+++ b/examples/gguf/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET gguf)
+set(TARGET llama-gguf)
 add_executable(${TARGET} gguf.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/gritlm/CMakeLists.txt
+++ b/examples/gritlm/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET gritlm)
+set(TARGET llama-gritlm)
 add_executable(${TARGET} gritlm.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/gritlm/README.md
+++ b/examples/gritlm/README.md
@ -26,7 +26,7 @@ $ scripts/hf.sh --repo cohesionet/GritLM-7B_gguf --file gritlm-7b_q4_1.gguf --ou

 Run the example using the downloaded model:
 ```console
-$ ./gritlm -m models/gritlm-7b_q4_1.gguf
+$ ./llama-gritlm -m models/gritlm-7b_q4_1.gguf

 Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "A purely peer-to-peer version of electronic cash w" is: 0.605
 Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "All text-based language problems can be reduced to" is: 0.103
--- a/examples/imatrix/CMakeLists.txt
+++ b/examples/imatrix/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET imatrix)
+set(TARGET llama-imatrix)
 add_executable(${TARGET} imatrix.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/imatrix/README.md
+++ b/examples/imatrix/README.md
@ -6,7 +6,7 @@ More information is available here: https://github.com/ggerganov/llama.cpp/pull/
 ## Usage

 ```
-./imatrix \
+./llama-imatrix \
    -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \
    [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \
    [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]
@ -28,8 +28,8 @@ For faster computation, make sure to use GPU offloading via the `-ngl` argument
 LLAMA_CUDA=1 make -j

 # generate importance matrix (imatrix.dat)
-./imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
+./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99

 # use the imatrix to perform a Q4_K_M quantization
-./quantize --imatrix imatrix.dat ggml-model-f16.gguf ./ggml-model-q4_k_m.gguf q4_k_m
+./llama-quantize --imatrix imatrix.dat ggml-model-f16.gguf ./ggml-model-q4_k_m.gguf q4_k_m
 ```
--- a/examples/infill/CMakeLists.txt
+++ b/examples/infill/CMakeLists.txt
@ -1,4 +1,4 @@
-set(TARGET infill)
+set(TARGET llama-infill)
 add_executable(${TARGET} infill.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/infill/README.md
+++ b/examples/infill/README.md
@ -42,5 +42,5 @@ scripts/hf.sh --repo TheBloke/CodeLlama-13B-GGUF --file codellama-13b.Q5_K_S.ggu
 ```

 ```bash
-./infill -t 10 -ngl 0 -m models/codellama-13b.Q5_K_S.gguf -c 4096 --temp 0.7 --repeat_penalty 1.1 -n 20 --in-prefix "def helloworld():\n    print(\"hell" --in-suffix "\n   print(\"goodbye world\")\n    "
+./llama-infill -t 10 -ngl 0 -m models/codellama-13b.Q5_K_S.gguf -c 4096 --temp 0.7 --repeat_penalty 1.1 -n 20 --in-prefix "def helloworld():\n    print(\"hell" --in-suffix "\n   print(\"goodbye world\")\n    "
 ```
--- a/examples/llava/CMakeLists.txt
+++ b/examples/llava/CMakeLists.txt
@ -32,6 +32,7 @@ endif()

 set(TARGET llava-cli)
 add_executable(llava-cli llava-cli.cpp)
+set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava)
 install(TARGETS llava-cli RUNTIME)
 target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(llava PRIVATE cxx_std_11)
--- a/examples/llava/MobileVLM-README.md
+++ b/examples/llava/MobileVLM-README.md
@ -9,12 +9,12 @@ The implementation is based on llava, and is compatible with llava and mobileVLM
 Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using **MobileVLM-1.7B** as an example, the different conversion step will be shown.

 ## Usage
-Build with cmake or run `make llava-cli` to build it.
+Build with cmake or run `make llama-llava` to build it.

-After building, run: `./llava-cli` to see the usage. For example:
+After building, run: `./llama-llava` to see the usage. For example:

 ```sh
-./llava-cli -m MobileVLM-1.7B/ggml-model-q4_k.gguf \
+./llama-llava -m MobileVLM-1.7B/ggml-model-q4_k.gguf \
    --mmproj MobileVLM-1.7B/mmproj-model-f16.gguf \
    --image path/to/an/image.jpg \
    -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWho is the author of this book? Answer the question using a single word or phrase. ASSISTANT:"
@ -62,7 +62,7 @@ python ./examples/convert-legacy-llama.py path/to/MobileVLM-1.7B

 5. Use `quantize` to convert LLaMA part's DataType from `fp16` to `q4_k`
 ```sh
-./quantize path/to/MobileVLM-1.7B/ggml-model-f16.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s
+./llama-quantize path/to/MobileVLM-1.7B/ggml-model-f16.gguf path/to/MobileVLM-1.7B/ggml-model-q4_k.gguf q4_k_s
 ```

 Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directory.
@ -82,7 +82,7 @@ refer to `android/adb_run.sh`, modify resources' `name` and `path`
 ### case 1
 **input**
 ```sh
-/data/local/tmp/llava-cli \
+/data/local/tmp/llama-llava \
    -m /data/local/tmp/ggml-model-q4_k.gguf \
    --mmproj /data/local/tmp/mmproj-model-f16.gguf \
    -t 4 \
@ -102,7 +102,7 @@ llama_print_timings:       total time =   34731.93 ms
 ### case 2
 **input**
 ```sh
-/data/local/tmp/llava-cli \
+/data/local/tmp/llama-llava \
    -m /data/local/tmp/ggml-model-q4_k.gguf \
    --mmproj /data/local/tmp/mmproj-model-f16.gguf \
    -t 4 \
@ -126,7 +126,7 @@ llama_print_timings:       total time =   34570.79 ms
 #### llava-cli release-b2005
 **input**
 ```sh
-/data/local/tmp/llava-cli \
+/data/local/tmp/llama-llava \
    -m /data/local/tmp/ggml-model-q4_k.gguf \
    --mmproj /data/local/tmp/mmproj-model-f16.gguf \
    -t 4 \
@ -200,7 +200,7 @@ make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
 ### case 1
 **input**
 ```sh
-./llava-cli \
+./llama-llava \
    -m /data/local/tmp/ggml-model-q4_k.gguf \
    --mmproj /data/local/tmp/mmproj-model-f16.gguf \
    --image /data/local/tmp/demo.jpeg \
@ -224,7 +224,7 @@ llama_print_timings:       total time =    1352.63 ms /   252 tokens
 ### case 2
 **input**
 ```sh
-./llava-cli \
+./llama-llava \
    -m /data/local/tmp/ggml-model-q4_k.gguf \
    --mmproj /data/local/tmp/mmproj-model-f16.gguf \
    -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:" \
--- a/examples/llava/README.md
+++ b/examples/llava/README.md
@ -11,12 +11,12 @@ For llava-1.6 a variety of prepared gguf models are available as well [7b-34b](h
 After API is confirmed, more models will be supported / uploaded.

 ## Usage
-Build with cmake or run `make llava-cli` to build it.
+Build with cmake or run `make llama-llava` to build it.

-After building, run: `./llava-cli` to see the usage. For example:
+After building, run: `./llama-llava` to see the usage. For example:

 ```sh
-./llava-cli -m ../llava-v1.5-7b/ggml-model-f16.gguf --mmproj ../llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg
+./llama-llava -m ../llava-v1.5-7b/ggml-model-f16.gguf --mmproj ../llava-v1.5-7b/mmproj-model-f16.gguf --image path/to/an/image.jpg
 ```

 **note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so.
@ -95,9 +95,9 @@ python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projecto
 python ./examples/convert-legacy-llama.py ../llava-v1.6-vicuna-7b/ --skip-unknown
 ```

-7) And finally we can run the llava-cli using the 1.6 model version:
+7) And finally we can run the llava cli using the 1.6 model version:
 ```console
-./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096
+./llama-llava -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096
 ```

 **note** llava-1.6 needs more context than llava-1.5, at least 3000 is needed (just run it at -c 4096)
--- a/examples/llava/android/adb_run.sh
+++ b/examples/llava/android/adb_run.sh
@ -10,7 +10,7 @@ prompt="A chat between a curious user and an artificial intelligence assistant.
 # prompt="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:"

 program_dir="build_64/bin"
-binName="llava-cli"
+binName="llama-llava"
 n_threads=4


--- a/examples/passkey/README.md
+++ b/examples/passkey/README.md
@ -8,5 +8,5 @@ See the following PRs for more info:
 ### Usage

 ```bash
-make -j && ./passkey -m ./models/llama-7b-v2/ggml-model-f16.gguf --junk 250
+make -j && ./llama-passkey -m ./models/llama-7b-v2/ggml-model-f16.gguf --junk 250
 ```
--- a/examples/retrieval/README.md
+++ b/examples/retrieval/README.md
@ -15,7 +15,7 @@ https://github.com/ggerganov/llama.cpp/pull/6193
 `retrieval` example can be tested as follows:

 ```bash
-make -j && ./retrieval --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .
+make -j && ./llama-retrieval --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .
 ```

 This chunks and embeds all given files and starts a loop requesting query inputs:
--- a/examples/train-text-from-scratch/README.md
+++ b/examples/train-text-from-scratch/README.md
@ -7,7 +7,7 @@ Basic usage instructions:
 wget https://raw.githubusercontent.com/brunoklein99/deep-learning-notes/master/shakespeare.txt

 # train
-./bin/train-text-from-scratch \
+./bin/llama-train-text-from-scratch \
        --vocab-model ../models/ggml-vocab-llama.gguf \
        --ctx 64 --embd 256 --head 8 --layer 16 \
        --checkpoint-in  chk-shakespeare-256x16-LATEST.gguf \
				`@ -0,0 +1 @@`
				`./build/Build/Products/Debug/llama-batched-swift`