diff --git a/common/arg.cpp b/common/arg.cpp
index e9770e0e4..6880117ed 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -691,7 +691,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
[](gpt_params & params) {
params.ctx_shift = false;
}
- ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
+ ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
add_opt(llama_arg(
{"--chunks"}, "N",
format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
diff --git a/examples/server/README.md b/examples/server/README.md
index 21dfc0809..dfca07f98 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -127,7 +127,7 @@ The project is under active development, and we are [looking for feedback and co
| Argument | Explanation |
| -------- | ----------- |
-| `--no-context-shift` | disables context shift on inifinite text generation (default: disabled) |
+| `--no-context-shift` | disables context shift on inifinite text generation (default: disabled)
(env: LLAMA_ARG_NO_CONTEXT_SHIFT) |
| `-sp, --special` | special tokens output enabled (default: false) |
| `--spm-infill` | use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: disabled) |
| `--pooling {none,mean,cls,last}` | pooling type for embeddings, use model default if unspecified
(env: LLAMA_ARG_POOLING) |