From f298cc63d2cec6cfa72446b8e7f4ec5448f3fd54 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 6 Jun 2024 15:39:23 +0100 Subject: [PATCH] server: update refs -> llama-server gitignore llama-server --- .devops/server-cuda.Dockerfile | 6 ++--- .devops/server-intel.Dockerfile | 4 ++-- .devops/server-rocm.Dockerfile | 4 ++-- .devops/server-vulkan.Dockerfile | 4 ++-- .devops/server.Dockerfile | 6 ++--- .devops/tools.sh | 2 +- .gitignore | 2 +- examples/json-schema-pydantic-example.py | 2 +- examples/server-llama2-13B.sh | 2 +- examples/server/README.md | 22 +++++++++---------- examples/server/bench/README.md | 2 +- examples/server/bench/bench.py | 2 +- examples/server/public_simplechat/readme.md | 4 ++-- examples/server/tests/README.md | 2 +- examples/server/tests/features/steps/steps.py | 4 ++-- grammars/README.md | 2 +- 16 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.devops/server-cuda.Dockerfile b/.devops/server-cuda.Dockerfile index 4e9747b82..0010ffd4c 100644 --- a/.devops/server-cuda.Dockerfile +++ b/.devops/server-cuda.Dockerfile @@ -25,13 +25,13 @@ ENV LLAMA_CUDA=1 # Enable cURL ENV LLAMA_CURL=1 -RUN make -j$(nproc) server +RUN make -j$(nproc) llama-server FROM ${BASE_CUDA_RUN_CONTAINER} as runtime RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev libgomp1 -COPY --from=build /app/server /server +COPY --from=build /app/llama-server /llama-server -ENTRYPOINT [ "/server" ] +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 13d00b737..fc132dfce 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -38,8 +38,8 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev -COPY --from=build /app/build/bin/server /server +COPY --from=build /app/build/bin/llama-server /llama-server ENV LC_ALL=C.utf8 -ENTRYPOINT [ "/server" ] +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/server-rocm.Dockerfile b/.devops/server-rocm.Dockerfile index a6b76dee8..f88cf20e5 100644 --- a/.devops/server-rocm.Dockerfile +++ b/.devops/server-rocm.Dockerfile @@ -45,6 +45,6 @@ ENV LLAMA_CURL=1 RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev -RUN make -j$(nproc) +RUN make -j$(nproc) llama-server -ENTRYPOINT [ "/app/server" ] +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index 6e757e171..6cf7fd763 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -23,9 +23,9 @@ RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ # Clean up WORKDIR / -RUN cp /app/build/bin/server /server && \ +RUN cp /app/build/bin/llama-server /llama-server && \ rm -rf /app ENV LC_ALL=C.utf8 -ENTRYPOINT [ "/server" ] +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/server.Dockerfile b/.devops/server.Dockerfile index bee63b966..aa93369be 100644 --- a/.devops/server.Dockerfile +++ b/.devops/server.Dockerfile @@ -11,15 +11,15 @@ COPY . . ENV LLAMA_CURL=1 -RUN make -j$(nproc) server +RUN make -j$(nproc) llama-server FROM ubuntu:$UBUNTU_VERSION as runtime RUN apt-get update && \ apt-get install -y libcurl4-openssl-dev libgomp1 -COPY --from=build /app/server /server +COPY --from=build /app/llama-server /llama-server ENV LC_ALL=C.utf8 -ENTRYPOINT [ "/server" ] +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/tools.sh b/.devops/tools.sh index 97424c3aa..4c0c732e7 100755 --- a/.devops/tools.sh +++ b/.devops/tools.sh @@ -26,7 +26,7 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then fi done elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then - ./server "$@" + ./llama-server "$@" else echo "Unknown command: $arg1" echo "Available commands: " diff --git a/.gitignore b/.gitignore index 5223c6963..5312c7fdf 100644 --- a/.gitignore +++ b/.gitignore @@ -76,7 +76,7 @@ models-mnt /quantize-stats /result /save-load-state -/server +/llama-server /simple /batched /batched-bench diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py index 69ebfd409..cc64e572b 100644 --- a/examples/json-schema-pydantic-example.py +++ b/examples/json-schema-pydantic-example.py @@ -1,5 +1,5 @@ # Usage: -#! ./server -m some-model.gguf & +#! ./llama-server -m some-model.gguf & #! pip install pydantic #! python json-schema-pydantic-example.py diff --git a/examples/server-llama2-13B.sh b/examples/server-llama2-13B.sh index 17fedc2b1..4ce79b7fa 100755 --- a/examples/server-llama2-13B.sh +++ b/examples/server-llama2-13B.sh @@ -16,7 +16,7 @@ GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 4096 --batch-size 1024}" # shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS -./server $GEN_OPTIONS \ +./llama-server $GEN_OPTIONS \ --model "$MODEL" \ --threads "$N_THREAD" \ --rope-freq-scale 1.0 \ diff --git a/examples/server/README.md b/examples/server/README.md index 0c3db8c84..d3284c998 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -80,26 +80,26 @@ The project is under active development, and we are [looking for feedback and co ## Build -`server` is built alongside everything else from the root of the project +`llama-server` is built alongside everything else from the root of the project - Using `make`: ```bash - make server + make llama-server ``` - Using `CMake`: ```bash cmake -B build - cmake --build build --config Release -t server + cmake --build build --config Release -t llama-server ``` - Binary is at `./build/bin/server` + Binary is at `./build/bin/llama-server` ## Build with SSL -`server` can also be built with SSL support using OpenSSL 3 +`llama-server` can also be built with SSL support using OpenSSL 3 - Using `make`: @@ -107,14 +107,14 @@ The project is under active development, and we are [looking for feedback and co # NOTE: For non-system openssl, use the following: # CXXFLAGS="-I /path/to/openssl/include" # LDFLAGS="-L /path/to/openssl/lib" - make LLAMA_SERVER_SSL=true server + make LLAMA_SERVER_SSL=true llama-server ``` - Using `CMake`: ```bash cmake -B build -DLLAMA_SERVER_SSL=ON - cmake --build build --config Release -t server + cmake --build build --config Release -t llama-server ``` ## Quick Start @@ -124,13 +124,13 @@ To get started right away, run the following command, making sure to use the cor ### Unix-based systems (Linux, macOS, etc.) ```bash -./server -m models/7B/ggml-model.gguf -c 2048 +./llama-server -m models/7B/ggml-model.gguf -c 2048 ``` ### Windows ```powershell -server.exe -m models\7B\ggml-model.gguf -c 2048 +llama-server.exe -m models\7B\ggml-model.gguf -c 2048 ``` The above command will start a server that by default listens on `127.0.0.1:8080`. @@ -629,11 +629,11 @@ bash chat.sh ### OAI-like API -The HTTP `server` supports an OAI-like API: https://github.com/openai/openai-openapi +The HTTP `llama-server` supports an OAI-like API: https://github.com/openai/openai-openapi ### API errors -`server` returns errors in the same format as OAI: https://github.com/openai/openai-openapi +`llama-server` returns errors in the same format as OAI: https://github.com/openai/openai-openapi Example of an error: diff --git a/examples/server/bench/README.md b/examples/server/bench/README.md index 23a3ec975..0f18ca396 100644 --- a/examples/server/bench/README.md +++ b/examples/server/bench/README.md @@ -99,7 +99,7 @@ The `bench.py` script does several steps: It aims to be used in the CI, but you can run it manually: ```shell -LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/server python bench.py \ +LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/llama-server python bench.py \ --runner-label local \ --name local \ --branch `git rev-parse --abbrev-ref HEAD` \ diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 86c5de101..4fbbb2032 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -245,7 +245,7 @@ def start_server(args): def start_server_background(args): # Start the server - server_path = '../../../build/bin/server' + server_path = '../../../build/bin/llama-server' if 'LLAMA_SERVER_BIN_PATH' in os.environ: server_path = os.environ['LLAMA_SERVER_BIN_PATH'] server_args = [ diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md index 36a46885d..2dc177825 100644 --- a/examples/server/public_simplechat/readme.md +++ b/examples/server/public_simplechat/readme.md @@ -44,12 +44,12 @@ http module. ### running using examples/server -bin/server -m path/model.gguf --path ../examples/server/public_simplechat [--port PORT] +./llama-server -m path/model.gguf --path examples/server/public_simplechat [--port PORT] ### running using python3's server module first run examples/server -* bin/server -m path/model.gguf +* ./llama-server -m path/model.gguf next run this web front end in examples/server/public_simplechat * cd ../examples/server/public_simplechat diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 83c0208f3..aca22822c 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -40,7 +40,7 @@ It's possible to override some scenario steps values with environment variables: | variable | description | |--------------------------|------------------------------------------------------------------------------------------------| | `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` | -| `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/server` | +| `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` | | `DEBUG` | "ON" to enable steps and server verbose mode `--verbose` | | `SERVER_LOG_FORMAT_JSON` | if set switch server logs to json format | | `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` | diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 26d9359d7..7b5dabb01 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1272,9 +1272,9 @@ def context_text(context): def start_server_background(context): if os.name == 'nt': - context.server_path = '../../../build/bin/Release/server.exe' + context.server_path = '../../../build/bin/Release/llama-server.exe' else: - context.server_path = '../../../build/bin/server' + context.server_path = '../../../build/bin/llama-server' if 'LLAMA_SERVER_BIN_PATH' in os.environ: context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] server_listen_addr = context.server_fqdn diff --git a/grammars/README.md b/grammars/README.md index 3ffc7cec0..b8a5f4aa7 100644 --- a/grammars/README.md +++ b/grammars/README.md @@ -1,6 +1,6 @@ # GBNF Guide -GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `examples/main` and `examples/server`. +GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `examples/main` and `examples/llama-server`. ## Background