build
: rename main → llama-cli, server → llama-server, llava-cli → llama-llava-cli, etc... (#7809)
* `main`/`server`: rename to `llama` / `llama-server` for consistency w/ homebrew
* server: update refs -> llama-server
gitignore llama-server
* server: simplify nix package
* main: update refs -> llama
fix examples/main ref
* main/server: fix targets
* update more names
* Update build.yml
* rm accidentally checked in bins
* update straggling refs
* Update .gitignore
* Update server-llm.sh
* main: target name -> llama-cli
* Prefix all example bins w/ llama-
* fix main refs
* rename {main->llama}-cmake-pkg binary
* prefix more cmake targets w/ llama-
* add/fix gbnf-validator subfolder to cmake
* sort cmake example subdirs
* rm bin files
* fix llama-lookup-* Makefile rules
* gitignore /llama-*
* rename Dockerfiles
* rename llama|main -> llama-cli; consistent RPM bin prefixes
* fix some missing -cli suffixes
* rename dockerfile w/ llama-cli
* rename(make): llama-baby-llama
* update dockerfile refs
* more llama-cli(.exe)
* fix test-eval-callback
* rename: llama-cli-cmake-pkg(.exe)
* address gbnf-validator unused fread warning (switched to C++ / ifstream)
* add two missing llama- prefixes
* Updating docs for eval-callback binary to use new `llama-` prefix.
* Updating a few lingering doc references for rename of main to llama-cli
* Updating `run-with-preset.py` to use new binary names.
Updating docs around `perplexity` binary rename.
* Updating documentation references for lookup-merge and export-lora
* Updating two small `main` references missed earlier in the finetune docs.
* Update apps.nix
* update grammar/README.md w/ new llama-* names
* update llama-rpc-server bin name + doc
* Revert "update llama-rpc-server bin name + doc"
This reverts commit e474ef1df4
.
* add hot topic notice to README.md
* Update README.md
* Update README.md
* rename gguf-split & quantize bins refs in **/tests.sh
---------
Co-authored-by: HanClinto <hanclinto@gmail.com>
This commit is contained in:
parent
963552903f
commit
1c641e6aac
128 changed files with 578 additions and 578 deletions
|
@ -15,7 +15,7 @@ node('x86_runner1'){ // Running on x86 runner containing latest vecto
|
|||
stage('Running llama.cpp'){
|
||||
sh'''#!/bin/bash
|
||||
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
||||
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
||||
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
||||
cat llama_log.txt # Printing results
|
||||
'''
|
||||
}
|
||||
|
|
|
@ -23,13 +23,13 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|||
# Enable CUDA
|
||||
ENV LLAMA_CUDA=1
|
||||
|
||||
RUN make -j$(nproc) main
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libgomp1
|
||||
|
||||
COPY --from=build /app/main /main
|
||||
COPY --from=build /app/llama-cli /llama-cli
|
||||
|
||||
ENTRYPOINT [ "/main" ]
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -15,12 +15,12 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
|||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
||||
cmake --build build --config Release --target main
|
||||
cmake --build build --config Release --target llama-cli
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||
|
||||
COPY --from=build /app/build/bin/main /main
|
||||
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/main" ]
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -40,6 +40,6 @@ ENV LLAMA_HIPBLAS=1
|
|||
ENV CC=/opt/rocm/llvm/bin/clang
|
||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||
|
||||
RUN make -j$(nproc) main
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
ENTRYPOINT [ "/app/main" ]
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
@ -15,13 +15,13 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
|
|||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN cmake -B build -DLLAMA_VULKAN=1 && \
|
||||
cmake --build build --config Release --target main
|
||||
cmake --build build --config Release --target llama-cli
|
||||
|
||||
# Clean up
|
||||
WORKDIR /
|
||||
RUN cp /app/build/bin/main /main && \
|
||||
RUN cp /app/build/bin/llama-cli /llama-cli && \
|
||||
rm -rf /app
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/main" ]
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -9,15 +9,15 @@ WORKDIR /app
|
|||
|
||||
COPY . .
|
||||
|
||||
RUN make -j$(nproc) main
|
||||
RUN make -j$(nproc) llama-cli
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libgomp1
|
||||
|
||||
COPY --from=build /app/main /main
|
||||
COPY --from=build /app/llama-cli /llama-cli
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/main" ]
|
||||
ENTRYPOINT [ "/llama-cli" ]
|
|
@ -36,9 +36,9 @@ make -j LLAMA_CLBLAST=1
|
|||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}/
|
||||
cp -p main %{buildroot}%{_bindir}/llamaclblast
|
||||
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
|
||||
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
|
||||
cp -p llama-cli %{buildroot}%{_bindir}/llama-clblast-cli
|
||||
cp -p llama-server %{buildroot}%{_bindir}/llama-clblast-server
|
||||
cp -p llama-simple %{buildroot}%{_bindir}/llama-clblast-simple
|
||||
|
||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
|
||||
|
@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
|||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/sysconfig/llama
|
||||
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
|
||||
ExecStart=/usr/bin/llama-clblast-server $LLAMA_ARGS
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
Restart=never
|
||||
|
||||
|
@ -67,9 +67,9 @@ rm -rf %{buildroot}
|
|||
rm -rf %{_builddir}/*
|
||||
|
||||
%files
|
||||
%{_bindir}/llamaclblast
|
||||
%{_bindir}/llamaclblastserver
|
||||
%{_bindir}/llamaclblastsimple
|
||||
%{_bindir}/llama-clblast-cli
|
||||
%{_bindir}/llama-clblast-server
|
||||
%{_bindir}/llama-clblast-simple
|
||||
/usr/lib/systemd/system/llamaclblast.service
|
||||
%config /etc/sysconfig/llama
|
||||
|
||||
|
|
|
@ -36,9 +36,9 @@ make -j LLAMA_CUDA=1
|
|||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}/
|
||||
cp -p main %{buildroot}%{_bindir}/llamacppcuda
|
||||
cp -p server %{buildroot}%{_bindir}/llamacppcudaserver
|
||||
cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple
|
||||
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
||||
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
||||
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
||||
|
||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
||||
|
@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
|||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/sysconfig/llama
|
||||
ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS
|
||||
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
Restart=never
|
||||
|
||||
|
@ -67,9 +67,9 @@ rm -rf %{buildroot}
|
|||
rm -rf %{_builddir}/*
|
||||
|
||||
%files
|
||||
%{_bindir}/llamacppcuda
|
||||
%{_bindir}/llamacppcudaserver
|
||||
%{_bindir}/llamacppcudasimple
|
||||
%{_bindir}/llama-cuda-cli
|
||||
%{_bindir}/llama-cuda-server
|
||||
%{_bindir}/llama-cuda-simple
|
||||
/usr/lib/systemd/system/llamacuda.service
|
||||
%config /etc/sysconfig/llama
|
||||
|
||||
|
|
|
@ -38,9 +38,9 @@ make -j
|
|||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}/
|
||||
cp -p main %{buildroot}%{_bindir}/llama
|
||||
cp -p server %{buildroot}%{_bindir}/llamaserver
|
||||
cp -p simple %{buildroot}%{_bindir}/llamasimple
|
||||
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
||||
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
||||
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
||||
|
||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
||||
|
@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
|
|||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/sysconfig/llama
|
||||
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
|
||||
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
Restart=never
|
||||
|
||||
|
@ -69,9 +69,9 @@ rm -rf %{buildroot}
|
|||
rm -rf %{_builddir}/*
|
||||
|
||||
%files
|
||||
%{_bindir}/llama
|
||||
%{_bindir}/llamaserver
|
||||
%{_bindir}/llamasimple
|
||||
%{_bindir}/llama-cli
|
||||
%{_bindir}/llama-server
|
||||
%{_bindir}/llama-simple
|
||||
/usr/lib/systemd/system/llama.service
|
||||
%config /etc/sysconfig/llama
|
||||
|
||||
|
|
|
@ -25,13 +25,13 @@ ENV LLAMA_CUDA=1
|
|||
# Enable cURL
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
RUN make -j$(nproc) server
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
||||
|
||||
COPY --from=build /app/server /server
|
||||
COPY --from=build /app/llama-server /llama-server
|
||||
|
||||
ENTRYPOINT [ "/server" ]
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -15,15 +15,15 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
|||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||
cmake --build build --config Release --target server
|
||||
cmake --build build --config Release --target llama-server
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev
|
||||
|
||||
COPY --from=build /app/build/bin/server /server
|
||||
COPY --from=build /app/build/bin/llama-server /llama-server
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/server" ]
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -45,6 +45,6 @@ ENV LLAMA_CURL=1
|
|||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev
|
||||
|
||||
RUN make -j$(nproc)
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
ENTRYPOINT [ "/app/server" ]
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
@ -19,13 +19,13 @@ RUN apt-get update && \
|
|||
WORKDIR /app
|
||||
COPY . .
|
||||
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||
cmake --build build --config Release --target server
|
||||
cmake --build build --config Release --target llama-server
|
||||
|
||||
# Clean up
|
||||
WORKDIR /
|
||||
RUN cp /app/build/bin/server /server && \
|
||||
RUN cp /app/build/bin/llama-server /llama-server && \
|
||||
rm -rf /app
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/server" ]
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -11,15 +11,15 @@ COPY . .
|
|||
|
||||
ENV LLAMA_CURL=1
|
||||
|
||||
RUN make -j$(nproc) server
|
||||
RUN make -j$(nproc) llama-server
|
||||
|
||||
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
||||
|
||||
COPY --from=build /app/server /server
|
||||
COPY --from=build /app/llama-server /llama-server
|
||||
|
||||
ENV LC_ALL=C.utf8
|
||||
|
||||
ENTRYPOINT [ "/server" ]
|
||||
ENTRYPOINT [ "/llama-server" ]
|
|
@ -6,11 +6,11 @@
|
|||
let
|
||||
inherit (config.packages) default;
|
||||
binaries = [
|
||||
"llama"
|
||||
"llama-cli"
|
||||
"llama-embedding"
|
||||
"llama-server"
|
||||
"quantize"
|
||||
"train-text-from-scratch"
|
||||
"llama-quantize"
|
||||
"llama-train-text-from-scratch"
|
||||
];
|
||||
mkApp = name: {
|
||||
type = "app";
|
||||
|
|
|
@ -243,8 +243,6 @@ effectiveStdenv.mkDerivation (
|
|||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||
# if they haven't been added yet.
|
||||
postInstall = ''
|
||||
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
|
||||
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
|
||||
mkdir -p $out/include
|
||||
cp $src/llama.h $out/include/
|
||||
'';
|
||||
|
@ -294,7 +292,7 @@ effectiveStdenv.mkDerivation (
|
|||
license = lib.licenses.mit;
|
||||
|
||||
# Accommodates `nix run` and `lib.getExe`
|
||||
mainProgram = "llama";
|
||||
mainProgram = "llama-cli";
|
||||
|
||||
# These people might respond, on the best effort basis, if you ping them
|
||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||
|
|
|
@ -10,11 +10,11 @@ shift
|
|||
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
||||
python3 ./convert-hf-to-gguf.py "$@"
|
||||
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
||||
./quantize "$@"
|
||||
./llama-quantize "$@"
|
||||
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
||||
./main "$@"
|
||||
./llama-cli "$@"
|
||||
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
|
||||
./finetune "$@"
|
||||
./llama-finetune "$@"
|
||||
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
||||
echo "Converting PTH to GGML..."
|
||||
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
||||
|
@ -22,11 +22,11 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
|||
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
||||
else
|
||||
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
||||
./quantize "$i" "${i/f16/q4_0}" q4_0
|
||||
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||
fi
|
||||
done
|
||||
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
||||
./server "$@"
|
||||
./llama-server "$@"
|
||||
else
|
||||
echo "Unknown command: $arg1"
|
||||
echo "Available commands: "
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue