add changes to README.md
This commit is contained in:
parent
a2c2d98c16
commit
eb08201227
2 changed files with 12 additions and 4 deletions
|
@ -24,6 +24,8 @@ Command line options:
|
||||||
- `--port`: Set the port to listen. Default: `8080`.
|
- `--port`: Set the port to listen. Default: `8080`.
|
||||||
- `--path`: path from which to serve static files (default examples/server/public)
|
- `--path`: path from which to serve static files (default examples/server/public)
|
||||||
- `--embedding`: Enable embedding extraction, Default: disabled.
|
- `--embedding`: Enable embedding extraction, Default: disabled.
|
||||||
|
- `-np N`, `--parallel N`: Set the number of slots for process requests (default: 1)
|
||||||
|
- `-cb`, `--cont-batching`: enable continuous batching (a.k.a dynamic batching) (default: disabled)
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
|
@ -158,6 +160,12 @@ node index.js
|
||||||
|
|
||||||
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
|
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
|
||||||
|
|
||||||
|
`slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||||
|
|
||||||
|
`cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
||||||
|
|
||||||
|
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications.
|
||||||
|
|
||||||
- **POST** `/tokenize`: Tokenize a given text.
|
- **POST** `/tokenize`: Tokenize a given text.
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
|
|
@ -78,8 +78,8 @@ enum slot_command {
|
||||||
|
|
||||||
struct slot_params {
|
struct slot_params {
|
||||||
bool stream = true;
|
bool stream = true;
|
||||||
uint32_t seed = -1; // RNG seed
|
uint32_t seed = -1; // RNG seed
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
std::string grammar = ""; // optional BNF-like grammar to constrain sampling
|
std::string grammar = ""; // optional BNF-like grammar to constrain sampling
|
||||||
bool cache_prompt = false; // remember a the prompt to avoid reprocessing all prompt
|
bool cache_prompt = false; // remember a the prompt to avoid reprocessing all prompt
|
||||||
std::vector<std::string> antiprompt;
|
std::vector<std::string> antiprompt;
|
||||||
|
@ -563,7 +563,7 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
|
|
||||||
void processSystemPromptData(json sys_props) {
|
void processSystemPromptData(json sys_props) {
|
||||||
system_prompt = sys_props.value("system_prompt", "");
|
system_prompt = sys_props.value("prompt", "");
|
||||||
user_name = sys_props.value("anti_prompt", "");
|
user_name = sys_props.value("anti_prompt", "");
|
||||||
assistant_name = sys_props.value("assistant_name", "");
|
assistant_name = sys_props.value("assistant_name", "");
|
||||||
notifySystemPromptChanged();
|
notifySystemPromptChanged();
|
||||||
|
@ -872,7 +872,7 @@ struct llama_server_context
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// context shift
|
// context shift takes effect only when there is a single slot
|
||||||
if(slots.size() == 1) {
|
if(slots.size() == 1) {
|
||||||
llama_client_slot slot = slots[0];
|
llama_client_slot slot = slots[0];
|
||||||
if (slot.cache_tokens.size() >= (size_t)n_ctx)
|
if (slot.cache_tokens.size() >= (size_t)n_ctx)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue