add LLAMA_ARG_HOST to server dockerfile
This commit is contained in:
parent
3748c734bf
commit
b3eed89478
7 changed files with 15 additions and 1 deletions
|
@ -24,6 +24,8 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
ENV GGML_CUDA=1
|
ENV GGML_CUDA=1
|
||||||
# Enable cURL
|
# Enable cURL
|
||||||
ENV LLAMA_CURL=1
|
ENV LLAMA_CURL=1
|
||||||
|
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
RUN make -j$(nproc) llama-server
|
RUN make -j$(nproc) llama-server
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@ RUN apt-get update && \
|
||||||
COPY --from=build /app/build/bin/llama-server /llama-server
|
COPY --from=build /app/build/bin/llama-server /llama-server
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,8 @@ ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
ENV GGML_HIPBLAS=1
|
ENV GGML_HIPBLAS=1
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
# Enable cURL
|
# Enable cURL
|
||||||
ENV LLAMA_CURL=1
|
ENV LLAMA_CURL=1
|
||||||
|
|
|
@ -23,6 +23,8 @@ RUN cp /app/build/bin/llama-server /llama-server && \
|
||||||
rm -rf /app
|
rm -rf /app
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,8 @@ RUN apt-get update && \
|
||||||
COPY --from=build /app/llama-server /llama-server
|
COPY --from=build /app/llama-server /llama-server
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
|
|
@ -327,6 +327,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||||
void gpt_params_parse_from_env(gpt_params & params) {
|
void gpt_params_parse_from_env(gpt_params & params) {
|
||||||
// we only care about server-related params for now
|
// we only care about server-related params for now
|
||||||
get_env("LLAMA_ARG_MODEL", params.model);
|
get_env("LLAMA_ARG_MODEL", params.model);
|
||||||
|
get_env("LLAMA_ARG_MODEL_URL", params.model_url);
|
||||||
|
get_env("LLAMA_ARG_MODEL_ALIAS", params.model_alias);
|
||||||
get_env("LLAMA_ARG_HF_REPO", params.hf_repo);
|
get_env("LLAMA_ARG_HF_REPO", params.hf_repo);
|
||||||
get_env("LLAMA_ARG_HF_FILE", params.hf_file);
|
get_env("LLAMA_ARG_HF_FILE", params.hf_file);
|
||||||
get_env("LLAMA_ARG_THREADS", params.n_threads);
|
get_env("LLAMA_ARG_THREADS", params.n_threads);
|
||||||
|
|
|
@ -252,6 +252,8 @@ Available environment variables (if specified, these variables will override par
|
||||||
- `LLAMA_CACHE`: cache directory, used by `--hf-repo`
|
- `LLAMA_CACHE`: cache directory, used by `--hf-repo`
|
||||||
- `HF_TOKEN`: Hugging Face access token, used when accessing a gated model with `--hf-repo`
|
- `HF_TOKEN`: Hugging Face access token, used when accessing a gated model with `--hf-repo`
|
||||||
- `LLAMA_ARG_MODEL`: equivalent to `-m`
|
- `LLAMA_ARG_MODEL`: equivalent to `-m`
|
||||||
|
- `LLAMA_ARG_MODEL_URL`: equivalent to `-mu`
|
||||||
|
- `LLAMA_ARG_MODEL_ALIAS`: equivalent to `-a`
|
||||||
- `LLAMA_ARG_HF_REPO`: equivalent to `--hf-repo`
|
- `LLAMA_ARG_HF_REPO`: equivalent to `--hf-repo`
|
||||||
- `LLAMA_ARG_HF_FILE`: equivalent to `--hf-file`
|
- `LLAMA_ARG_HF_FILE`: equivalent to `--hf-file`
|
||||||
- `LLAMA_ARG_THREADS`: equivalent to `-t`
|
- `LLAMA_ARG_THREADS`: equivalent to `-t`
|
||||||
|
@ -282,11 +284,11 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/models
|
- ./models:/models
|
||||||
environment:
|
environment:
|
||||||
|
# alternatively, you can use "LLAMA_ARG_MODEL_URL" to download the model
|
||||||
LLAMA_ARG_MODEL: /models/my_model.gguf
|
LLAMA_ARG_MODEL: /models/my_model.gguf
|
||||||
LLAMA_ARG_CTX_SIZE: 4096
|
LLAMA_ARG_CTX_SIZE: 4096
|
||||||
LLAMA_ARG_N_PARALLEL: 2
|
LLAMA_ARG_N_PARALLEL: 2
|
||||||
LLAMA_ARG_ENDPOINT_METRICS: 1 # to disable, either remove or set to 0
|
LLAMA_ARG_ENDPOINT_METRICS: 1 # to disable, either remove or set to 0
|
||||||
LLAMA_ARG_HOST: 0.0.0.0
|
|
||||||
LLAMA_ARG_PORT: 8080
|
LLAMA_ARG_PORT: 8080
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue